From 5089175aa42591edc8a22249be4e6b9b6864c42c Mon Sep 17 00:00:00 2001 From: Raphael Date: Tue, 17 Jan 2023 14:39:25 -0800 Subject: [PATCH 1/5] Chore(Other):add v22.0.2 in releases list (#405) latest version in V22.0.x serie is v22.0.2 update table and link --- content/releases/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/releases/index.md b/content/releases/index.md index 23b7e8d3..f1c4052c 100644 --- a/content/releases/index.md +++ b/content/releases/index.md @@ -20,7 +20,7 @@ To learn about the latest releases and other important announcements, watch the Dgraph Release Series | Current Release | Supported? | First Release Date | End of life -----------------------|-----------------|------------|--------------------|-------------- - v22.0.x | [v22.0.0][] | Yes | October 2022 | N/A + v22.0.x | [v22.0.2][] | Yes | October 2022 | N/A v21.12.x(discontinued)| [v21.12.0][] | No | December 2021 | December 2022 v21.03.x | [v21.03.0][] | Yes | March 2021 | June 2023 v20.11.x | [v20.11.0][] | No | December 2020 | December 2021 @@ -30,11 +30,11 @@ To learn about the latest releases and other important announcements, watch the v1.1.x | [v1.1.1][] | No | January 2020 | January 2021 v1.0.x | [v1.0.18][] | No | December 2017 | March 2020 -[v22.0.0]: https://discuss.dgraph.io/t/dgraph-v22-0-0-is-now-ga/17889 +[v22.0.2]: https://discuss.dgraph.io/t/dgraph-release-v22-0-2-is-now-generally-available/18117 [v21.03.0]: https://discuss.dgraph.io/t/release-notes-v21-03-0-resilient-rocket/13587 [v20.11.0]: https://discuss.dgraph.io/t/release-notes-v20-11-0-tenacious-tchalla/11942 [v20.07.3]: https://discuss.dgraph.io/t/dgraph-v20-07-3-release/12107 [v20.03.7]: https://discuss.dgraph.io/t/dgraph-v20-03-7-release/12077 [v1.2.8]: https://discuss.dgraph.io/t/dgraph-v1-2-8-release/11183 [v1.1.1]: https://discuss.dgraph.io/t/dgraph-v1-1-1-release/5664 -[v1.0.18]: https://discuss.dgraph.io/t/dgraph-v1-0-18-release/5663 \ No newline at end of file +[v1.0.18]: https://discuss.dgraph.io/t/dgraph-v1-0-18-release/5663 From 665660feee53fe3e5cad083ca34fa4da5e5ebc24 Mon Sep 17 00:00:00 2001 From: Rajakavitha Kodhandapani Date: Wed, 18 Jan 2023 11:27:34 +0530 Subject: [PATCH 2/5] [docs/revamp] Installation (#402) resolves #393 Partially addresses: https://github.com/dgraph-io/dgraph-docs/issues/302 Note: Create and edit docs in the `main` branch when you can, so that we only cherry-pick out of `main`, not into `main`. --> Co-authored-by: Raphael --- content/_index.md | 11 +- content/deploy/_index.md | 9 - content/deploy/download.md | 119 ------------ content/deploy/single-host-setup.md | 160 ---------------- content/howto/commandline/create-cli.md | 2 +- content/installation/_index.md | 66 +++++++ content/installation/download.md | 60 ++++++ .../{deploy => installation}/kubernetes.md | 6 +- .../multi-host-setup.md | 6 +- content/installation/single-host-setup.md | 174 ++++++++++++++++++ content/migration/migrate-tool.md | 1 + 11 files changed, 319 insertions(+), 295 deletions(-) delete mode 100644 content/deploy/download.md delete mode 100644 content/deploy/single-host-setup.md create mode 100644 content/installation/_index.md create mode 100644 content/installation/download.md rename content/{deploy => installation}/kubernetes.md (99%) rename content/{deploy => installation}/multi-host-setup.md (96%) create mode 100644 content/installation/single-host-setup.md diff --git a/content/_index.md b/content/_index.md index 4dd1dcf0..9b7faba9 100644 --- a/content/_index.md +++ b/content/_index.md @@ -15,7 +15,7 @@ aliases = ["/contribute"]

Designed from the ground up to be run in production, Dgraph is the native GraphQL database with a graph backend. It is open-source, scalable, distributed, highly available and lightning fast.

-

Tip: New to Dgraph? Take the Dgraph Tour to run live queries in your browser. Then, try Dgraph as a cloud service, or download Dgraph to deploy it yourself.

+

Tip: New to Dgraph? Take the Dgraph Tour to run live queries in your browser. Then, try Dgraph as a cloud service, or }}">download Dgraph to deploy it yourself.

@@ -45,6 +45,15 @@ aliases = ["/contribute"]

+ -
}}"> diff --git a/content/deploy/download.md b/content/deploy/download.md deleted file mode 100644 index 70a1b443..00000000 --- a/content/deploy/download.md +++ /dev/null @@ -1,119 +0,0 @@ -+++ -date = "2017-03-20T22:25:17+11:00" -title = "Download" -weight = 2 -[menu.main] - parent = "deploy" -+++ - -{{% notice "tip" %}} -For a single server setup, recommended for new users, please see [Get Started]({{< relref "get-started/index.md" >}}) page. -{{% /notice %}} - -## Docker - -```sh -docker pull dgraph/dgraph:{{< version >}} - -# You can test that it worked fine, by running: -docker run -it dgraph/dgraph:{{< version >}} dgraph -``` - -## Automatic download - -Running - -```sh -curl https://get.dgraph.io -sSf | bash - -# Test that it worked fine, by running: -dgraph -``` - -would install the `dgraph` binary into your system. - -Other installation options: - -> Add `-s --` before the flags.() -`-y | --accept-license`: Automatically agree to the terms of the Dgraph Community License (default: "n"). - -`-s | --systemd`: Automatically create Dgraph's installation as Systemd services (default: "n"). - -`-v | --version`: Choose Dgraph's version manually (default: The latest stable release, you can do tag combinations e.g {{< version >}}-beta1 or -rc1). - ->Installing Dgraph and requesting the automatic creation of systemd service. e.g: - -```sh -curl https://get.dgraph.io -sSf | bash -s -- --systemd -``` - -Using Environment variables: - -`ACCEPT_LICENSE`: Automatically agree to the terms of the Dgraph Community License (default: "n"). - -`INSTALL_IN_SYSTEMD`: Automatically create Dgraph's installation as Systemd services (default: "n"). - -`VERSION`: Choose Dgraph's version manually (default: The latest stable release). - -```sh -curl https://get.dgraph.io -sSf | VERSION={{< version >}}-beta1 bash -``` - -{{% notice "note" %}} -Be aware that using this script will overwrite the installed version and can lead to compatibility problems. For example, if you were using version v1.0.5 and forced the installation of {{< version >}}-Beta, the existing data won't be compatible with the new version. The data must be [exported]({{< relref "deploy/dgraph-administration.md#exporting-database" >}}) before running this script and reimported to the new cluster running the updated version. -{{% /notice %}} - -## Manual download [optional] - -If you don't want to follow the automatic installation method, you could manually download the appropriate tar for your platform from **[Dgraph releases](https://github.com/dgraph-io/dgraph/releases)**. After downloading the tar for your platform from Github, extract the binary to `/usr/local/bin` like so. - -```sh -# For Linux -$ sudo tar -C /usr/local/bin -xzf dgraph-linux-amd64-VERSION.tar.gz - -# For Mac -$ sudo tar -C /usr/local/bin -xzf dgraph-darwin-amd64-VERSION.tar.gz - -# Test that it worked fine, by running: -dgraph -``` - -## Install from Source - -{{% notice "note" %}} -You can build the Ratel UI from source separately following its build -[instructions](https://github.com/dgraph-io/ratel/blob/master/INSTRUCTIONS.md). -Ratel UI is distributed via Dgraph releases using any of the download methods -listed above. You can also use https://play.dgraph.io/ to run Ratel. -{{% /notice %}} - -If you want to install from source, install Go 1.13+ or later and the following dependencies: - -#### Ubuntu - -```bash -sudo apt-get update -sudo apt-get install build-essential -``` - -#### macOS - -As a prerequisite, first install [XCode](https://apps.apple.com/us/app/xcode/id497799835?mt=12) (or the [XCode Command-line Tools](https://developer.apple.com/downloads/)) and [Homebrew](https://brew.sh/). - -Next, install the required dependencies: - -```bash -brew update -brew install jemalloc go -``` - -### Build and Install - -Then clone the Dgraph repository and use `make install` to install the Dgraph binary in the directory named by the GOBIN environment variable, which defaults to $GOPATH/bin or $HOME/go/bin if the GOPATH environment variable is not set. - - -```bash -git clone https://github.com/dgraph-io/dgraph.git -cd dgraph -make install -``` diff --git a/content/deploy/single-host-setup.md b/content/deploy/single-host-setup.md deleted file mode 100644 index ae5d91df..00000000 --- a/content/deploy/single-host-setup.md +++ /dev/null @@ -1,160 +0,0 @@ -+++ -date = "2017-03-20T22:25:17+11:00" -title = "Single Host Setup" -weight = 6 -[menu.main] - parent = "deploy" -+++ - -## Run directly on the host - -You can run Dgraph directly on a single Linux host. As of release v21.03, Dgraph -no longer supports installation on Windows or macOS. To run Dgraph on Windows -and macOS, use the [standalone Docker image]({{}}). - -### Run Dgraph zero - -```sh -dgraph zero --my=IPADDR:5080 -``` -The `--my` flag is the connection that Dgraph alphas would dial to talk to -zero. So, the port `5080` and the IP address must be visible to all the Dgraph alphas. - -For all other various flags, run `dgraph zero --help`. - -### Run Dgraph Alpha - -```sh -dgraph alpha --my=IPADDR:7080 --zero=localhost:5080 -dgraph alpha --my=IPADDR:7081 --zero=localhost:5080 -o=1 -``` - -Notice the use of `-o` for the second Alpha to add offset to the default ports used. Dgraph Zero automatically assigns a unique ID to each Alpha, which persists in the [Write Ahead Logs]({{< relref "design-concepts/concepts#write-ahead-logs" >}}) (**wal**) directory; users can specify the index using the `--raft` superflag's `idx` option. -Dgraph Alpha nodes use two directories to persist data and WAL logs, and these -directories must be different for each Alpha if they are running on the same -host. You can use `-p` and `-w` to change the location of the data and WAL directories. -To learn more about other flags, run the following command: - -`dgraph alpha --help`. - -### Run Dgraph's Ratel UI - -Ratel provides data visualization and cluster management for Dgraph. To get started with Ratel, use it online with the [Dgraph Ratel Dashboard](https://play.dgraph.io) or clone and build Ratel using the [instructions -from the Ratel repository on GitHub](https://github.com/dgraph-io/ratel/blob/master/INSTRUCTIONS.md). To learn more, see [Ratel Overview]({{< relref "/ratel/overview" >}}). - -Ratel's binary now has its own Docker image https://hub.docker.com/r/dgraph/ratel/tags?page=1&ordering=last_updated - -{{% notice "note" %}} -Pay attention that this is an HTTPS site. Google has removed communication between non-HTTPS applications. That is, a local Dgraph will hardly connect to a Ratel using TLS/SSL. In case of a local cluster without HTTPS, use the docker image locally as well. -{{% /notice %}} - -## Run using Docker - -{{% notice "note" %}} -As of release v21.03, Dgraph no longer supports installation on Windows or macOS. -Windows and macOS users who want to evaluate Dgraph can use the [standalone Docker image]({{}}). -{{% /notice %}} - -Dgraph cluster can be setup running as containers on a single host. First, you'd want to figure out the host IP address. You can typically do that via - -```sh -ip addr # On Arch Linux -ifconfig # On Ubuntu/Mac -``` -We'll refer to the host IP address via `HOSTIPADDR`. - -### Create Docker network - -```sh -docker network create dgraph_default -``` - -### Run Dgraph Zero - -```sh -mkdir ~/zero # Or any other directory where data should be stored. - -docker run -it -p 5080:5080 --network dgraph_default -p 6080:6080 -v ~/zero:/dgraph dgraph/dgraph:{{< version >}} dgraph zero --my=HOSTIPADDR:5080 -``` - -### Run Dgraph Alpha -```sh -mkdir ~/server1 # Or any other directory where data should be stored. - -docker run -it -p 7080:7080 --network dgraph_default -p 8080:8080 -p 9080:9080 -v ~/server1:/dgraph dgraph/dgraph:{{< version >}} dgraph alpha --zero=HOSTIPADDR:5080 --my=HOSTIPADDR:7080 -``` -```sh -mkdir ~/server2 # Or any other directory where data should be stored. - -docker run -it -p 7081:7081 --network dgraph_default -p 8081:8081 -p 9081:9081 -v ~/server2:/dgraph dgraph/dgraph:{{< version >}} dgraph alpha --zero=HOSTIPADDR:5080 --my=HOSTIPADDR:7081 -o=1 -``` -Notice the use of -o for server2 to override the default ports for server2. - -## Run using Docker Compose (On single AWS instance) - -We will use [Docker Machine](https://docs.docker.com/machine/overview/). It is a tool that lets you install Docker Engine on virtual machines and easily deploy applications. - -* [Install Docker Machine](https://docs.docker.com/machine/install-machine/) on your machine. - -{{% notice "note" %}}These instructions are for running Dgraph Alpha without TLS config. -Instructions for running with TLS refer [TLS instructions]({{< relref "deploy/tls-configuration.md" >}}).{{% /notice %}} - -Here we'll go through an example of deploying Dgraph Zero, Alpha and Ratel on an AWS instance. - -* Make sure you have Docker Machine installed by following [instructions](https://docs.docker.com/machine/install-machine/), provisioning an instance on AWS is just one step away. You'll have to [configure your AWS credentials](http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/setup-credentials.html) for programmatic access to the Amazon API. - -* Create a new docker machine. - -```sh -docker-machine create --driver amazonec2 aws01 -``` - -Your output should look like - -```sh -Running pre-create checks... -Creating machine... -(aws01) Launching instance... -... -... -Docker is up and running! -To see how to connect your Docker Client to the Docker Engine running on this virtual machine, run: docker-machine env aws01 -``` - -The command would provision a `t2-micro` instance with a security group called `docker-machine` -(allowing inbound access on 2376 and 22). You can either edit the security group to allow inbound access to '5080`, `8080`, `9080` (default ports for Dgraph Zero & Alpha) or you can provide your own security -group which allows inbound access on port 22, 2376 (required by Docker Machine), 5080, 8080 and 9080. Remember port *5080* is only required if you are running Dgraph Live Loader or Dgraph Bulk Loader from outside. - -[Here](https://docs.docker.com/machine/drivers/aws/#options) is a list of full options for the `amazonec2` driver which allows you choose the instance type, security group, AMI among many other things. - -{{% notice "tip" %}}Docker machine supports [other drivers](https://docs.docker.com/machine/drivers/gce/) like GCE, Azure etc.{{% /notice %}} - -* Install and run Dgraph using docker-compose - -Docker Compose is a tool for running multi-container Docker applications. You can follow the -instructions [here](https://docs.docker.com/compose/install/) to install it. - -Run the command below to download the `docker-compose.yml` file on your machine. - -```sh -wget https://github.com/dgraph-io/dgraph/raw/main/contrib/config/docker/docker-compose.yml -``` - -{{% notice "note" %}}The config mounts `/data`(you could mount something else) on the instance to `/dgraph` within the -container for persistence.{{% /notice %}} - -* Connect to the Docker Engine running on the machine. - -Running `docker-machine env aws01` tells us to run the command below to configure -our shell. -``` -eval $(docker-machine env aws01) -``` -This configures our Docker client to talk to the Docker engine running on the AWS Machine. - -Finally run the command below to start the Zero and Alpha. -``` -docker-compose up -d -``` -This would start 3 Docker containers running Dgraph Zero, Alpha and Ratel on the same machine. Docker would restart the containers in case there is any error. -You can look at the logs using `docker-compose logs`. diff --git a/content/howto/commandline/create-cli.md b/content/howto/commandline/create-cli.md index ae01d9c6..c8e6e4d4 100644 --- a/content/howto/commandline/create-cli.md +++ b/content/howto/commandline/create-cli.md @@ -11,7 +11,7 @@ The completion script is code that uses the builtin bash command complete to def ## Before you begin -* [Install Dgraph]({{< relref "/deploy/download" >}}#buil-and-install). +* [Install Dgraph]({{< relref "/installation/download" >}}#buil-and-install). * Determine the shell you are running: ```bash echo $0 diff --git a/content/installation/_index.md b/content/installation/_index.md new file mode 100644 index 00000000..e4381c2a --- /dev/null +++ b/content/installation/_index.md @@ -0,0 +1,66 @@ ++++ +title = "Installation" +[menu.main] + identifier = "installation" + weight = 4 ++++ + +
+
+

+ This section is about installing Dgraph in dev or hobbyist environment as well as production environments with HA and horizontal scalability using multiple Alpha nodes in a cluster. +

+ +
+
+ + + + + + + + + diff --git a/content/installation/download.md b/content/installation/download.md new file mode 100644 index 00000000..f9c5c6e1 --- /dev/null +++ b/content/installation/download.md @@ -0,0 +1,60 @@ ++++ +date = "2017-03-20T22:25:17+11:00" +title = "Download" +weight = 1 +[menu.main] + parent = "installation" ++++ + +{{% notice "tip" %}} +If you are new to Dgraph, the easiest way to get Dgraph up and running is using the [Dgraph Cloud](https://cloud.dgraph.io/) or to use Dgraph standalone Docker image. +{{% /notice %}} + + +You can obtain Dgraph binary for the latest version as well as previous releases using automatic install script, manual download, through Docker images or by building the binary from the open source code. + + +{{% tabs %}} {{< tab "Docker" >}} +1. Install Docker. + +1. Pull the latest Dgraph image using docker: + ```sh + docker pull dgraph/dgraph:latest + ``` +1. Verify that the image is downloaded: + + ```sh + docker images + ``` +{{< /tab >}} + +{{% tab "Automatic" %}} +On linux system, you can get the binary using the automatic script: +1. Download the Dgraph installation script to install Dgraph automatically: + ```sh + curl https://get.dgraph.io -sSf | bash + ``` + +1. Verify that it works fine, by running: + ``` + dgraph version + ``` + For more information about the various installation scripts that you can use, see [install scripts](https://github.com/dgraph-io/Install-Dgraph). +{{< /tab >}} +{{% tab "Manual" %}} +On linux system, you can download a tar file and install manually. +Download the appropriate tar for your platform from **[Dgraph releases](https://github.com/dgraph-io/dgraph/releases)**. After downloading the tar for your platform from Github, extract the binary to `/usr/local/bin` like so. + +1. Download the installation file: + ``` + $ sudo tar -C /usr/local/bin -xzf dgraph-linux-amd64-VERSION.tar.gz + ``` +1. Verify that it works fine, by running: + ``` + dgraph version + ``` +{{% /tab %}} +{{% tab "Source" %}} +You can also build **Dgraph** and **Ratel UI** from the source code by following the instructions from [Contributing to Dgraph](https://github.com/dgraph-io/dgraph/blob/master/CONTRIBUTING.md) or [Building and running ratel](https://github.com/dgraph-io/ratel/blob/master/INSTRUCTIONS.md). +{{% /tab %}}{{% /tabs %}} + diff --git a/content/deploy/kubernetes.md b/content/installation/kubernetes.md similarity index 99% rename from content/deploy/kubernetes.md rename to content/installation/kubernetes.md index 05afe0c4..29388c93 100644 --- a/content/deploy/kubernetes.md +++ b/content/installation/kubernetes.md @@ -1,9 +1,9 @@ +++ date = "2017-03-20T22:25:17+11:00" -title = "Using Kubernetes" -weight = 8 +title = "Kubernetes" +weight = 2 [menu.main] - parent = "deploy" + parent = "installation" +++ The following section covers running Dgraph with Kubernetes. We have tested Dgraph with Kubernetes versions 1.14 to 1.16 on [GKE](https://cloud.google.com/kubernetes-engine) and versions 1.14 to 1.17 on [EKS](https://aws.amazon.com/eks/). diff --git a/content/deploy/multi-host-setup.md b/content/installation/multi-host-setup.md similarity index 96% rename from content/deploy/multi-host-setup.md rename to content/installation/multi-host-setup.md index 3743ef9b..f5ee8d27 100644 --- a/content/deploy/multi-host-setup.md +++ b/content/installation/multi-host-setup.md @@ -1,11 +1,13 @@ +++ date = "2017-03-20T22:25:17+11:00" title = "Multi-Host Setup" -weight = 7 +weight = 4 [menu.main] - parent = "deploy" + parent = "installation" +++ +Dgraph does not recommend multi-host setup for a production environment. For a production environment you need to ensure High Availability with external persistent storage, automatic recovery of failed services, automatic recovery of failed systems such as virtual machines, and highly recommended disaster recovery such as backup/restore or export/import with automation. + ## Using Docker Swarm ### Cluster Setup Using Docker Swarm diff --git a/content/installation/single-host-setup.md b/content/installation/single-host-setup.md new file mode 100644 index 00000000..d9f86dfc --- /dev/null +++ b/content/installation/single-host-setup.md @@ -0,0 +1,174 @@ ++++ +date = "2017-03-20T22:25:17+11:00" +title = "Single Host Cluster Setup" +weight = 1 +[menu.main] + parent = "installation" ++++ + + Dgraph does not recommend single host setup for a production environment. For a production environment you need to ensure High Availability with external persistent storage, automatic recovery of failed services, automatic recovery of failed systems such as virtual machines, and highly recommended disaster recovery such as backup/restore or export/import with automation. + You can install and run Dgraph cluster on a single host using Docker, Docker Compose, or Dgraph command line. + +## Docker + +Dgraph cluster can be setup running as containers on a single host. +{{% notice "note" %}} +As of release v21.03, Dgraph no longer supports installation on Windows or macOS. +Windows and macOS users who want to evaluate Dgraph can use the [standalone Docker image]({{}}). +{{% /notice %}} + +### Before you begin + +Ensure that you have installed: + * Docker [Desktop](https://docs.docker.com/desktop/) + * Docker [Engine](https://docs.docker.com/engine/install/) + * Docker [Compose](https://docs.docker.com/compose/) + +### Using Docker +To setup a Dgraph cluster on a single host using Docker: + +1. Get the `` of the host using: + ```sh + ip addr # On Arch Linux + ifconfig # On Ubuntu/Mac + ``` +1. Pull the latest Dgraph image using docker: + ```sh + docker pull dgraph/dgraph:latest + ``` +1. Verify that the image is downloaded: + + ```sh + docker images + ``` +1. Create a `` using: + ```sh + docker network create + ``` +1. Create a directory ``to store data for Dgraph Zero and run the container: + ```sh + mkdir ~/ # Or any other directory where data should be stored. + + docker run -it -p 5080:5080 --network -p 6080:6080 -v ~/:/dgraph dgraph/dgraph:latest dgraph zero --my=:5080 + ``` +1. Create a directory `` to store for Dgraph Alpha + and run the container: + ```sh + mkdir ~/ # Or any other directory where data should be stored. + + docker run -it -p 7080:7080 --network -p 8080:8080 -p 9080:9080 -v ~/:/dgraph dgraph/dgraph:latest dgraph alpha --zero=:5080 --my=:7080 + ``` +1. Create a directory `` to store for the second Dgraph Alpha + and run the container: + ```sh + mkdir ~/ # Or any other directory where data should be stored. + + docker run -it -p 7081:7081 --network -p 8081:8081 -p 9081:9081 -v ~/:/dgraph dgraph/dgraph:{{< version >}} dgraph alpha --zero=:5080 --my=:7081 -o=1 + ``` + To override the default ports for the second Alpha use `-o`. +1. Connect the Dgraph cluster that are running using https://play.dgraph.io/. For information about connecting, see [Ratel UI]({{< relref "ratel/connection.md" >}}). + +## Dgraph Command Line + +You can run Dgraph directly on a single Linux host. + +### Before you begin + +Ensure that you have: +* Installed [Dgraph]({{< relref "installation/download.md" >}}) on the Linux host. +* Made a note of the `` of the host. + +### Using Dgraph Command Line +You can start Dgraph on a single host using the dgraph command line. + +1. Run Dgraph zero + ```sh + dgraph zero --my=:5080 + ``` + The `--my` flag is the connection that Dgraph alphas dial to talk to zero. So, the port `5080` and the IP address must be visible to all the Dgraph alphas. For all other various flags, run `dgraph zero --help`. + +1. Run two Dgraph alpha nodea: + ```sh + dgraph alpha --my=:7080 --zero=localhost:5080 + dgraph alpha --my=:7081 --zero=localhost:5080 -o=1 + ``` + Dgraph alpha nodes use two directories to persist data and [WAL logs]({{< relref "design-concepts/concepts#write-ahead-logs" >}}), and these directories must be different for each alpha if they are running on the same host. You can use `-p` and `-w` to change the location of the data and WAL directories.To learn more about other flags, run `dgraph alpha --help`. + +1. Connect the Dgraph cluster that are running using https://play.dgraph.io/. For information about connecting, see [Ratel UI]({{< relref "ratel/connection.md" >}}). + +## Docker Compose + +You can install Dgraph using the Docker Compose on a system hosted on any of the cloud provider. + +### Before you begin + + * Ensure that you have installed Docker [Compose](https://docs.docker.com/compose/). + * IP address of the system on cloud ``. + * IP address of the local host ``. + +### Using Docker Compose + +1. Download the Dgraph `docker-compose.yml` file: + + wget https://github.com/dgraph-io/dgraph/raw/main/contrib/config/docker/docker-compose.yml + + By default only the localhost IP 127.0.0.1 is allowed. When you run Dgraph on Docker, the containers are assigned IPs and those IPs need to be added to the allowed list. + +1. Add a list of IPs allowed for Dgraph so that you can create the schema. Use an editor of your choice and add the `` of the local host in `docker-compose.yml` file: + ```txt + # This Docker Compose file can be used to quickly bootup Dgraph Zero + # and Alpha in different Docker containers. + # It mounts /tmp/data on the host machine to /dgraph within the + # container. You will need to change /tmp/data to a more appropriate location. + # Run `docker-compose up` to start Dgraph. + version: "3.2" + services: + zero: + image: dgraph/dgraph:latest + volumes: + - /tmp/data:/dgraph + ports: + - 5080:5080 + - 6080:6080 + restart: on-failure + command: dgraph zero --my=zero:5080 + alpha: + image: dgraph/dgraph:latest + volumes: + - /tmp/data:/dgraph + ports: + - 8080:8080 + - 9080:9080 + restart: on-failure + command: dgraph alpha --my=alpha:7080 --zero=zero:5080 --security whitelist= + ratel: + image: dgraph/ratel:latest + ports: + - 8000:8000 + + ``` + +1. Run the `docker-compose` command to start the Dgraph services in the docker container: + + sudo docker-compose up + + After Dgraph is installed on Docker, you can view the images and the containers running in Docker for Dgraph. + +1. View the containers running for Dgraph using: + + sudo docker ps -a + + An output similar to the following appears: + + ```bash + CONTAINER ID IMAGE COMMAND CREATED + 4b67157933b6 dgraph/dgraph:latest "dgraph zero --my=ze…" 2 days ago + 3faf9bba3a5b dgraph/ratel:latest "/usr/local/bin/dgra…" 2 days ago + a6b5823b668d dgraph/dgraph:latest "dgraph alpha --my=a…" 2 days ago + ``` + +1. To access the Ratel UI for queries, mutations, and altering schema, open your web browser and navigate to `http://:8000`. +1. Click **Launch Latest** to access the latest stable release of Ratel UI. +1. In the **Dgraph Server Connection** dialog that set the **Dgraph server URL** as `http://:8080` +1. Click **Connect** . The connection health appears green. +1. Click **Continue** to query or run mutations. diff --git a/content/migration/migrate-tool.md b/content/migration/migrate-tool.md index cd77052f..bf0db8df 100644 --- a/content/migration/migrate-tool.md +++ b/content/migration/migrate-tool.md @@ -24,6 +24,7 @@ You can run the Dgraph migrate tool using this command: ```sh dgraph migrate [flags] +``` 1. Create a `config.properties` file that has the following settings and values should not be in quotes: ```txt From 0362951eae4e90b60dc267f209f2e9e81d7109f7 Mon Sep 17 00:00:00 2001 From: rderbier Date: Mon, 23 Jan 2023 09:18:07 -0800 Subject: [PATCH 3/5] short reference to bulk loader and live loader --- content/deploy/_index.md | 6 +- content/deploy/cli-command-reference.md | 140 +++++++++--------- content/deploy/dgraph-administration.md | 4 +- content/deploy/fast-data-loading/_index.md | 7 - content/deploy/fast-data-loading/overview.md | 29 ---- content/deploy/ports-usage.md | 2 +- content/deploy/troubleshooting.md | 10 +- .../enterprise-features/encryption-at-rest.md | 43 +----- content/enterprise-features/multitenancy.md | 40 ++--- content/howto/importdata/about_import.md | 15 +- .../importdata}/bulk-loader.md | 31 +++- content/howto/importdata/import-data-cloud.md | 7 - content/howto/importdata/import-data.md | 7 - .../importdata}/live-loader.md | 49 +++++- content/installation/kubernetes.md | 6 +- content/migration/about-data-migration.md | 9 +- content/migration/loading-csv-data.md | 12 +- content/migration/migrate-tool.md | 8 +- content/mutations/batch-mutations.md | 16 -- 19 files changed, 194 insertions(+), 247 deletions(-) delete mode 100644 content/deploy/fast-data-loading/_index.md delete mode 100644 content/deploy/fast-data-loading/overview.md rename content/{deploy/fast-data-loading => howto/importdata}/bulk-loader.md (92%) delete mode 100644 content/howto/importdata/import-data-cloud.md delete mode 100644 content/howto/importdata/import-data.md rename content/{deploy/fast-data-loading => howto/importdata}/live-loader.md (86%) delete mode 100644 content/mutations/batch-mutations.md diff --git a/content/deploy/_index.md b/content/deploy/_index.md index 3f36ca79..9b7d3f1f 100644 --- a/content/deploy/_index.md +++ b/content/deploy/_index.md @@ -38,14 +38,14 @@ running multiple instances of Dgraph, over multiple servers in a cluster. }}">

TLS Configuration

- Setting up secure TLS connections between clients and servers + Setting up secure TLS connections between clients and servers

- }}"> -

Fast Data Loading

+
}}"> +

Data Import

Dgraph tools for fast data loading

diff --git a/content/deploy/cli-command-reference.md b/content/deploy/cli-command-reference.md index a59b7561..7c000da7 100644 --- a/content/deploy/cli-command-reference.md +++ b/content/deploy/cli-command-reference.md @@ -9,27 +9,27 @@ weight = 20 You can use the Dgraph command-line interface (CLI) to deploy and manage Dgraph. You use it in self-managed deployment scenarios; such as running Dgraph on on-premises servers hosted on your physical infrastructure, or running Dgraph in -the cloud on your AWS, GCP, or Azure infrastructure. +the cloud on your AWS, GCP, or Azure infrastructure. Dgraph has a root command used throughout its CLI: `dgraph`. The `dgraph` command is supported by multiple subcommands (such as `alpha` or `update`), some of which are also supported by their own subcommands. For example, the `dgraph acl` command requires you to specify one of its subcommands: `add`, `del`, `info` or `mod`. As with other CLIs, you provide command options using flags like `--help` -or `--telemetry`. +or `--telemetry`. {{% notice "tip" %}} -The term *command* is used instead of *subcommand* throughout this document, +The term *command* is used instead of *subcommand* throughout this document, except when clarifying relationships in the CLI command hierarchy. The term *command* is also used for combinations of commands and their subcommands, -such as `dgraph alpha debug`. +such as `dgraph alpha debug`. {{% /notice %}} ## Dgraph CLI superflags in release v21.03 Some flags are deprecated and replaced in release v21.03. In previous Dgraph -releases, multiple related flags are often used in a command, causing some -commands to be very long. Starting in release v21.03, Dgraph uses *superflags* +releases, multiple related flags are often used in a command, causing some +commands to be very long. Starting in release v21.03, Dgraph uses *superflags* for some flags used by the most complex commands: `alpha`, `backup`, `bulk`, `debug`, `live` and `zero`. Superflags are compound flags: they contain one or more options that let you define multiple settings in a semicolon-delimited list. @@ -76,7 +76,7 @@ dgraph alpha --ludicrous enabled=true; concurrency=16; The following table maps Dgraph CLI flags from release v20.11 and earlier that have been replaced by superflags (and their options) in release v21.03. Any flags -not shown here are unchanged in release v21.03. +not shown here are unchanged in release v21.03. ### ACL superflag @@ -128,7 +128,7 @@ The `--badger` superflag allows you to set many advanced [Badger options](https: | `--graphql_introspection` | bool | `introspection` | bool |`alpha`| Enables GraphQL schema introspection | | `--graphql_debug` | bool | `debug` | bool |`alpha`| Enables debug mode in GraphQL | | `--graphql_extensions` | bool | `extensions` | bool |`alpha`| Enables extensions in GraphQL response body | -| `--graphql_poll_interval` | time.Duration | `poll-interval` | [string](https://github.com/dgraph-io/ristretto/blob/master/z/flags.go#L80-L98) |`alpha`| The polling interval for GraphQL subscriptions | +| `--graphql_poll_interval` | time.Duration | `poll-interval` | [string](https://github.com/dgraph-io/ristretto/blob/master/z/flags.go#L80-L98) |`alpha`| The polling interval for GraphQL subscriptions | | `--graphql_lambda_url` | string | `lambda-url` | string |`alpha`| The URL of a lambda server that implements custom GraphQL JavaScript resolvers | ### Limit superflag @@ -223,7 +223,7 @@ flag. {{% notice "note" %}} Although many of the commands listed below have subcommands, only `dgraph` and -subcommands of `dgraph` are included in this listing. +subcommands of `dgraph` are included in this listing. {{% /notice %}} The Dgraph CLI has several commands, which are organized into the following groups: @@ -236,13 +236,13 @@ The Dgraph CLI has several commands, which are organized into the following grou The commands in these groups are shown in the following table: -|Group | Command | Note | +|Group | Command | Note | |------------------|--------------------------------|------------------------------| | (root) | [`dgraph`](#dgraph-root-command) | Root command for Dgraph CLI | | Dgraph core | [`alpha`](#dgraph-alpha) | Dgraph Alpha database node commands | | Dgraph core | [`zero`](#dgraph-zero) | Dgraph Zero management node commands | -| Data loading | [`bulk`](#dgraph-bulk) | Dgraph [Bulk Loader]({{< relref "deploy/fast-data-loading/bulk-loader.md" >}}) commands | -| Data loading | [`live`](#dgraph-live) | Dgraph [Live Loader]({{< relref "deploy/fast-data-loading/live-loader.md" >}}) commands | +| Data loading | [`bulk`](#dgraph-bulk) | Dgraph [Bulk Loader]({{< relref "bulk-loader.md" >}}) commands | +| Data loading | [`live`](#dgraph-live) | Dgraph [Live Loader]({{< relref "live-loader.md" >}}) commands | | Data loading | [`restore`](#dgraph-restore) | Command used to restore backups created using Dgraph Enterprise Edition | | Dgraph security | [`acl`](#dgraph-acl) | Dgraph [Access Control List (ACL)]({{< relref "enterprise-features/access-control-lists.md" >}}) commands | | Dgraph security | [`audit`](#dgraph-audit) | Decrypt audit files | @@ -266,17 +266,17 @@ the help listing for `dgraph --help` is shown below: ```shell Usage: - dgraph [command] + dgraph [command] Generic: help Help about any command - version Prints the dgraph version details + version Prints the dgraph version details Available Commands: Dgraph Core: alpha Run Dgraph Alpha database server - zero Run Dgraph Zero management server + zero Run Dgraph Zero management server Data Loading: bulk Run Dgraph Bulk Loader @@ -289,17 +289,17 @@ Dgraph Security: cert Dgraph TLS certificate management Dgraph Debug: - debug Debug Dgraph instance + debug Debug Dgraph instance debuginfo Generate debug information on the current node Dgraph Tools: - completion Generates shell completion scripts for bash or zsh + completion Generates shell completion scripts for bash or zsh conv Dgraph Geo file converter - decrypt Run the Dgraph decryption tool + decrypt Run the Dgraph decryption tool export_backup Export data inside single full or incremental backup increment Increment a counter transactionally - lsbackup List info on backups in a given location - migrate Run the Dgraph migration tool from a MySQL database to Dgraph + lsbackup List info on backups in a given location + migrate Run the Dgraph migration tool from a MySQL database to Dgraph raftmigrate Run the Raft migration tool upgrade Run the Dgraph upgrade tool @@ -336,9 +336,9 @@ your deployment. The following replicates the help listing for `dgraph alpha --h A Dgraph Alpha instance stores the data. Each Dgraph Alpha is responsible for storing and serving one data group. If multiple Alphas serve the same group, they form a Raft group and provide synchronous replication. - + Usage: - dgraph alpha [flags] + dgraph alpha [flags] Flags: --acl string [Enterprise Feature] ACL options @@ -466,9 +466,9 @@ your deployment. The following replicates the help listing shown when you run A Dgraph Zero instance manages the Dgraph cluster. Typically, a single Zero instance is sufficient for the cluster; however, one can run multiple Zero instances to achieve high-availability. - + Usage: - dgraph zero [flags] + dgraph zero [flags] Flags: --audit string Audit options @@ -476,7 +476,7 @@ Flags: days=10; The number of days audit logs will be preserved. encrypt-file=; The path to the key file to be used for audit log encryption. output=; [stdout, /path/to/dir] This specifies where audit logs should be output to. - "stdout" is for standard output. You can also specify the directory where audit logs + "stdout" is for standard output. You can also specify the directory where audit logs will be saved. When stdout is specified as output other fields will be ignored. size=100; The audit log max size in MB after which it will be rolled over. (default "compress=false; days=10; size=100; dir=; output=; encrypt-file=;") @@ -530,13 +530,13 @@ Use "dgraph zero [command] --help" for more information about a command. #### `dgraph bulk` This command is used to bulk load data with the Dgraph -[Bulk Loader]({{< relref "deploy/fast-data-loading/bulk-loader.md" >}}) tool. +[Bulk Loader]({{< relref "bulk-loader.md" >}}) tool. The following replicates the help listing shown when you run `dgraph bulk --help`: ```shell - Run Dgraph Bulk Loader + Run Dgraph Bulk Loader Usage: - dgraph bulk [flags] + dgraph bulk [flags] Flags: --badger string Badger options (Refer to badger documentation for all possible options) @@ -597,13 +597,13 @@ Use "dgraph bulk [command] --help" for more information about a command. #### `dgraph live` -This command is used to load live data with the Dgraph [Live Loader]({{< relref "deploy/fast-data-loading/live-loader.md" >}}) tool. +This command is used to load live data with the Dgraph [Live Loader]({{< relref "live-loader.md" >}}) tool. The following replicates the help listing shown when you run `dgraph live --help`: ```shell - Run Dgraph Live Loader + Run Dgraph Live Loader Usage: - dgraph live [flags] + dgraph live [flags] Flags: -a, --alpha string Comma-separated list of Dgraph alpha gRPC server addresses (default "127.0.0.1:9080") @@ -703,9 +703,9 @@ $ dgraph restore -p /var/db/dgraph -l s3://s3.us-west-2.amazonaws.com/srfrog/dgr # Restore from dir and update Ts: $ dgraph restore -p . -l /var/backups/dgraph -z localhost:5080 - + Usage: - dgraph restore [flags] + dgraph restore [flags] Flags: --backup_id string The ID of the backup series to restore. If empty, it will restore the latest series. @@ -755,11 +755,11 @@ This command runs the Dgraph Enterprise Edition ACL tool. The following replicat the help listing shown when you run `dgraph acl --help`: ```shell -Run the Dgraph Enterprise Edition ACL tool +Run the Dgraph Enterprise Edition ACL tool Usage: - dgraph acl [command] + dgraph acl [command] -Available Commands: +Available Commands: add Run Dgraph acl tool to add a user or group del Run Dgraph acl tool to delete a user or group info Show info about a user or group @@ -792,11 +792,11 @@ when you run the `dgraph alpha` command. The following replicates the help listi shown when you run `dgraph audit --help`: ```shell -Dgraph audit tool +Dgraph audit tool Usage: - dgraph audit [command] + dgraph audit [command] -Available Commands: +Available Commands: decrypt Run Dgraph Audit tool to decrypt audit files Flags: @@ -811,12 +811,12 @@ This command lets you manage [TLS certificates]({{< relref "deploy/tls-configura The following replicates the help listing shown when you run `dgraph cert --help`: ```shell -Dgraph TLS certificate management +Dgraph TLS certificate management Usage: dgraph cert [flags] - dgraph cert [command] + dgraph cert [command] -Available Commands: +Available Commands: ls lists certificates and keys Flags: @@ -845,9 +845,9 @@ This command is used to debug issues with a Dgraph database instance. The following replicates the help listing shown when you run `dgraph debug --help`: ```shell - Debug Dgraph instance + Debug Dgraph instance Usage: - dgraph debug [flags] + dgraph debug [flags] Flags: --at uint Set read timestamp for all txns. (default 18446744073709551615) @@ -886,13 +886,13 @@ Use "dgraph debug [command] --help" for more information about a command. #### `dgraph debuginfo` -This command generates information about the current node that is useful for debugging. +This command generates information about the current node that is useful for debugging. The following replicates the help listing shown when you run `dgraph debuginfo --help`: ```shell -Generate debug information on the current node +Generate debug information on the current node Usage: - dgraph debuginfo [flags] + dgraph debuginfo [flags] Flags: -a, --alpha string Address of running dgraph alpha. (default "localhost:8080") @@ -917,11 +917,11 @@ This command generates shell completion scripts for `bash` and `zsh` CLIs. The following replicates the help listing shown when you run `dgraph completion --help`: ```shell -Generates shell completion scripts for bash or zsh +Generates shell completion scripts for bash or zsh Usage: - dgraph completion [command] + dgraph completion [command] -Available Commands: +Available Commands: bash bash shell completion zsh zsh shell completion @@ -938,9 +938,9 @@ files into RDF so that they can be consumed by Dgraph. The following replicates the help listing shown when you run `dgraph conv --help`: ```shell -Dgraph Geo file converter +Dgraph Geo file converter Usage: - dgraph conv [flags] + dgraph conv [flags] Flags: --geo string Location of geo file to convert @@ -958,9 +958,9 @@ cluster. The following replicates the help listing shown when you run `dgraph decrypt --help`: ```shell - A tool to decrypt an export file created by an encrypted Dgraph cluster + A tool to decrypt an export file created by an encrypted Dgraph cluster Usage: - dgraph decrypt [flags] + dgraph decrypt [flags] Flags: --encryption string [Enterprise Feature] Encryption At Rest options @@ -990,9 +990,9 @@ created using Dgraph Enterprise Edition into an exported folder. The following replicates key information from the help listing shown when you run `dgraph export_backup --help`: ```shell - Export data inside single full or incremental backup + Export data inside single full or incremental backup Usage: - dgraph export_backup [flags] + dgraph export_backup [flags] Flags: -d, --destination string The folder to which export the backups. @@ -1017,20 +1017,20 @@ Flags: secret-id-file=; Vault SecretID file, used for AppRole authentication. (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") -Use "dgraph export_backup [command] --help" for more information about a command. +Use "dgraph export_backup [command] --help" for more information about a command. ``` #### `dgraph increment` This command increments a counter transactionally, so that you can confirm that an Alpha node is able to handle both query and mutation requests. To learn more, -see [Using the Increment Tool]({{< relref "howto/using-increment-tool.md" >}}). +see [Using the Increment Tool]({{< relref "howto/using-increment-tool.md" >}}). The following replicates the help listing shown when you run `dgraph increment --help`: ```shell -Increment a counter transactionally +Increment a counter transactionally Usage: - dgraph increment [flags] + dgraph increment [flags] Flags: --alpha string Address of Dgraph Alpha. (default "localhost:9080") @@ -1066,9 +1066,9 @@ Edition. To learn more, see [Backup List Tool]({{< relref "enterprise-features/l The following replicates the help listing shown when you run `dgraph lsbackup --help`: ```shell -List info on backups in a given location +List info on backups in a given location Usage: - dgraph lsbackup [flags] + dgraph lsbackup [flags] Flags: -h, --help help for lsbackup @@ -1085,9 +1085,9 @@ to move data from a MySQL database to Dgraph. The following replicates the help listing shown when you run `dgraph migrate --help`: ```shell -Run the Dgraph migration tool from a MySQL database to Dgraph +Run the Dgraph migration tool from a MySQL database to Dgraph Usage: - dgraph migrate [flags] + dgraph migrate [flags] Flags: --db string The database to import @@ -1107,13 +1107,13 @@ Use "dgraph migrate [command] --help" for more information about a command. #### `dgraph raftmigrate` -This command runs the Dgraph Raft migration tool. +This command runs the Dgraph Raft migration tool. The following replicates the help listing shown when you run `dgraph raftmigrate --help`: ```shell -Run the Raft migration tool +Run the Raft migration tool Usage: - dgraph raftmigrate [flags] + dgraph raftmigrate [flags] Flags: --encryption_key_file string The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). Enterprise feature. @@ -1134,13 +1134,13 @@ Use "dgraph raftmigrate [command] --help" for more information about a command. #### `dgraph upgrade` -This command helps you to upgrade from an earlier Dgraph release to a newer release. +This command helps you to upgrade from an earlier Dgraph release to a newer release. The following replicates the help listing shown when you run `dgraph upgrade --help`: ```shell -This tool is supported only for the mainstream release versions of Dgraph, not for the beta releases. +This tool is supported only for the mainstream release versions of Dgraph, not for the beta releases. Usage: - dgraph upgrade [flags] + dgraph upgrade [flags] Flags: --acl upgrade ACL from v1.2.2 to >=v20.03.0 diff --git a/content/deploy/dgraph-administration.md b/content/deploy/dgraph-administration.md index 8e60372d..f2c54027 100644 --- a/content/deploy/dgraph-administration.md +++ b/content/deploy/dgraph-administration.md @@ -189,7 +189,7 @@ Doing periodic exports is always a good idea. This is particularly useful if you 2. Ensure it is successful 3. [Shutdown Dgraph]({{< relref "#shut-down-database" >}}) and wait for all writes to complete 4. Start a new Dgraph cluster using new data directories (this can be done by passing empty directories to the options `-p` and `-w` for Alphas and `-w` for Zeros) -5. Reload the data via [bulk loader]({{< relref "deploy/fast-data-loading/bulk-loader.md" >}}) +5. Reload the data via [bulk loader]({{< relref "bulk-loader.md" >}}) 6. Verify the correctness of the new Dgraph cluster. If all looks good, you can delete the old directories (export serves as an insurance) These steps are necessary because Dgraph's underlying data format could have changed, and reloading the export avoids encoding incompatibilities. @@ -265,7 +265,7 @@ are affected. Then, you can drop the old types and predicates from DB. command will do. {{% notice "note" %}} -The above steps are valid for migration from a cluster in `v20.11` to a single-tenant cluster in `v21.03`, +The above steps are valid for migration from a cluster in `v20.11` to a single-tenant cluster in `v21.03`, as backup and restore are cluster-wide operations and a single namespace cannot be restored in a multi-tenant cluster. {{% /notice %}} diff --git a/content/deploy/fast-data-loading/_index.md b/content/deploy/fast-data-loading/_index.md deleted file mode 100644 index 7eaad624..00000000 --- a/content/deploy/fast-data-loading/_index.md +++ /dev/null @@ -1,7 +0,0 @@ -+++ -title = "Fast Data Loading" -[menu.main] - identifier = "fast-data-loading" - parent = "deploy" - weight = 13 -+++ \ No newline at end of file diff --git a/content/deploy/fast-data-loading/overview.md b/content/deploy/fast-data-loading/overview.md deleted file mode 100644 index 73ab4bb3..00000000 --- a/content/deploy/fast-data-loading/overview.md +++ /dev/null @@ -1,29 +0,0 @@ -+++ -date = "2017-03-20T22:25:17+11:00" -title = "Overview" -weight = 1 -[menu.main] - parent = "fast-data-loading" - identifier = "data-loading-overview" -+++ - -There are two different tools that can be used for fast data loading: - -- `dgraph live` runs the [Dgraph Live Loader]({{< relref "live-loader.md" >}}) -- `dgraph bulk` runs the [Dgraph Bulk Loader]({{< relref "bulk-loader.md" >}}) - -{{% notice "note" %}} Both tools only accept [RDF N-Quad/Triple -data](https://www.w3.org/TR/n-quads/) or JSON in plain or gzipped format. Data -in other formats must be converted.{{% /notice %}} - -## Live Loader - -[Dgraph Live Loader]({{< relref "live-loader.md" >}}) (run with `dgraph live`) is a small helper program which reads RDF N-Quads from a gzipped file, batches them up, creates mutations (using the go client) and shoots off to Dgraph. - -## Bulk Loader - -[Dgraph Bulk Loader]({{< relref "bulk-loader.md" >}}) serves a similar purpose to the Dgraph Live Loader, but can -only be used to load data into a new cluster. It cannot be run on an existing -Dgraph cluster. Dgraph Bulk Loader is **considerably faster** than the Dgraph -Live Loader and is the recommended way to perform the initial import of large -datasets into Dgraph. diff --git a/content/deploy/ports-usage.md b/content/deploy/ports-usage.md index d9695e10..b5287ba0 100644 --- a/content/deploy/ports-usage.md +++ b/content/deploy/ports-usage.md @@ -36,7 +36,7 @@ follows: 1: Dgraph Zero uses port 5080 for internal communication within the - cluster, and to support the [fast data loading]({{< relref "deploy/fast-data-loading/overview.md" >}}) + cluster, and to support the [data import]({{< relref "about_import.md" >}}) tools: Dgraph Live Loader and Dgraph Bulk Loader. 2: Dgraph Zero uses port 6080 for diff --git a/content/deploy/troubleshooting.md b/content/deploy/troubleshooting.md index e2cfecf3..5fe6e057 100644 --- a/content/deploy/troubleshooting.md +++ b/content/deploy/troubleshooting.md @@ -9,7 +9,7 @@ This page provides tips on how to troubleshoot issues with running Dgraph. ### Running out of memory (OOM) -When you [bulk load]({{< relref "deploy/fast-data-loading/bulk-loader.md" >}}) +When you [bulk load]({{< relref "bulk-loader.md" >}}) or [backup]({{< relref "/enterprise-features/binary-backups.md" >}}) your data, Dgraph can consume more memory than usual due to a high volume of writes. This can cause OOM crashes. @@ -18,13 +18,13 @@ You can take the following steps to help avoid OOM crashes: * **Increase the amount of memory available**: If you run Dgraph with insufficient memory, that can result in OOM crashes. The recommended minimum RAM to run Dgraph -on desktops and laptops (single-host deployment) is 16GB. For servers in a +on desktops and laptops (single-host deployment) is 16GB. For servers in a cluster deployment, the recommended minimum is 8GB per server. This applies to EC2 and GCE instances, as well as on-premises servers. * **Reduce the number of Go routines**: You can troubleshoot OOM issues by reducing the number of Go routines (`goroutines`) used by Dgraph from the default value of eight. For example, you can reduce the `goroutines` that Dgraph uses to four -by calling the `dgraph alpha` command with the following option: +by calling the `dgraph alpha` command with the following option: `--badger "goroutines=4"` @@ -33,7 +33,7 @@ by calling the `dgraph alpha` command with the following option: If Dgraph logs "too many open files" errors, you should increase the per-process open file descriptor limit to permit more open files. During normal operations, -Dgraph must be able to open many files. Your operating system may have an open +Dgraph must be able to open many files. Your operating system may have an open file descriptor limit with a low default value that isn't adequate for a database like Dgraph. If so, you might need to increase this limit. @@ -44,7 +44,7 @@ command, as follows: * Get soft limit: `ulimit -n -S` A soft limit of `1048576` open files is the recommended minimum to use Dgraph in -production, but you can try increasing this soft limit if you continue to see +production, but you can try increasing this soft limit if you continue to see this error. To learn more, see the `ulimit` documentation for your operating system. diff --git a/content/enterprise-features/encryption-at-rest.md b/content/enterprise-features/encryption-at-rest.md index c8bdb3f2..ad46a462 100644 --- a/content/enterprise-features/encryption-at-rest.md +++ b/content/enterprise-features/encryption-at-rest.md @@ -9,8 +9,8 @@ weight = 6 {{% notice "note" %}} This feature was introduced in [v1.1.1](https://github.com/dgraph-io/dgraph/releases/tag/v1.1.1). For migrating unencrypted data to a new Dgraph cluster with encryption enabled, you need to -[export the database]({{< relref "deploy/dgraph-administration.md#export-database" >}}) and [fast data load]({{< relref "deploy/fast-data-loading/overview.md" >}}), -preferably using the [bulk loader]({{< relref "deploy/fast-data-loading/bulk-loader.md" >}}). +[export the database]({{< relref "deploy/dgraph-administration.md#export-database" >}}) and [import data]({{< relref "about_import.md" >}}), +preferably using the [bulk loader]({{< relref "bulk-loader.md" >}}). {{% /notice %}} Encryption at rest refers to the encryption of data that is stored physically in any @@ -38,7 +38,7 @@ desired key size): tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file ``` {{% notice "note" %}} -On a macOS you may have to use `LC_CTYPE=C; tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file`. To view the key use `cat enc_key_file`. +On a macOS you may have to use `LC_CTYPE=C; tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file`. To view the key use `cat enc_key_file`. {{% /notice %}} Alternatively, you can use the `--vault` [superflag's]({{< relref "deploy/cli-command-reference.md" >}}) options to enable encryption, as [explained below](#example-using-dgraph-cli-with-hashicorp-vault-configuration). @@ -114,43 +114,8 @@ If the Alpha server restarts, the `--encryption key-file` or the `--vault` super ## Turn off Encryption -You can use [live loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}) or [bulk loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}) to decrypt the data while importing. +You can use [live loader]({{< relref "live-loader.md" >}}) or [bulk loader]({{< relref "bulk-loader.md" >}}) to decrypt the data while importing. -### Using live loader - -You can import your encrypted data using [live loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}) into a new Dgraph Alpha node without encryption enabled. - -```bash -# Encryption Key from the file path -dgraph live --files "" --schema "" \ - --alpha "" --zero "" \ - --encryption key-file="" - -# Encryption Key from HashiCorp Vault -dgraph live --files "" --schema "" \ - --alpha "" --zero "" \ - --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" - -``` - -### Using bulk loader - -You can also use [bulk loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}), to turn off encryption. This will generate a new unencrypted `p` that will be used by the Alpha process. In this, case you need to pass `--encryption key-file`, `--encrypted` and `--encrypted_out` flags. - -```bash -# Encryption Key from the file path -dgraph bulk --files "" --schema "" --zero "" \ - --encrypted="true" --encrypted_out="false" \ - --encryption key-file="" - -# Encryption Key from HashiCorp Vault -dgraph bulk --files "" --schema "" --zero "" \ - --encrypted="true" --encrypted_out="false" \ - --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" - -``` - -In this case, we are also passing the flag `--encrypted=true` as the exported data has been taken from an encrypted Dgraph cluster and we are also specifying the flag `--encrypted_out=false` to specify that we want the `p` directory (_that will be generated by the bulk loader process_) to be unencrypted. ## Change Encryption Key diff --git a/content/enterprise-features/multitenancy.md b/content/enterprise-features/multitenancy.md index 8ddbe88e..97480e5e 100644 --- a/content/enterprise-features/multitenancy.md +++ b/content/enterprise-features/multitenancy.md @@ -16,7 +16,7 @@ Multi-tenancy is an enterprise feature and needs [Access Control Lists]({{< relr ## Overview -Multi-tenancy is built upon [Access Control Lists]({{< relref "access-control-lists.md" >}}) (ACL), +Multi-tenancy is built upon [Access Control Lists]({{< relref "access-control-lists.md" >}}) (ACL), and enables multiple tenants to share a Dgraph cluster using unique namespaces. The tenants are logically separated, and their data lies in the same `p` directory. Each namespace has a group guardian, which has root access to that namespace. @@ -28,7 +28,7 @@ users of other namespaces. {{% notice "note" %}} Dgraph provides a timeout limit per query that's configurable using the `--limit` superflag's `query-limit` option. There's no time limit for queries by default, but you can override it when running Dgraph Alpha. -For multi-tenant environments a suggested `query-limit` value is 500ms. +For multi-tenant environments a suggested `query-limit` value is 500ms. {{% /notice %}} ## FAQ @@ -49,7 +49,7 @@ The super admin is used only for database administration operations, such as exp The access controls are applied per tenant at a predicate level. For example, the user `John Smith` belonging to the group `Data Approvers` may only have read-only access to predicates, while user `Jane Doe`, who belongs to the group `Data Editors`, can be given access to modify predicates. - All of these ACL constraints have to be configured for each tenant. + All of these ACL constraints have to be configured for each tenant. - Are tenants a physical separation or a logical one? @@ -64,7 +64,7 @@ The super admin is used only for database administration operations, such as exp ## Namespace A multi-tenancy Namespace acts as a logical silo, so data stored in one namespace is not accessible from another namespace. -Each namespace has a group guardian (with root access to that namespace), and a unique `uint64` identifier. +Each namespace has a group guardian (with root access to that namespace), and a unique `uint64` identifier. Users are members of a single namespace, and cross-namespace queries are not allowed. {{% notice "note" %}} @@ -79,13 +79,13 @@ users of other namespaces. Multi-tenancy defines certain ACL roles for the shared cluster: -- [Guardians of the Galaxy](#guardians-of-the-galaxy) (Super Admins) +- [Guardians of the Galaxy](#guardians-of-the-galaxy) (Super Admins) - Guardians of the Namespace - They can create users and groups inside their own namespace - They can assign users to groups inside their own namespace - They can assign predicates to groups inside their own namespace - They can add users to groups inside the namespace - - They can export their namespace + - They can export their namespace - They can query and mutate in their namespace - They can't query or mutate across namespaces - Normal users @@ -98,7 +98,7 @@ Multi-tenancy defines certain ACL roles for the shared cluster: A _Guardian of the Galaxy_ is a Super Admin of the default namespace (`0x00`). -As a super-admin, a _Guardian of the Galaxy_ can: +As a super-admin, a _Guardian of the Galaxy_ can: - [Create](#create-a-namespace) and [delete](#delete-a-namespace) namespaces - Reset the passwords - Query and mutate the default namespace (`0x00`) @@ -459,30 +459,10 @@ All other `drop` operations run at namespace level and are namespace specific. Backups are currently cluster-wide only, but [exports](#exports) can be created by namespace. Only a [Guardian of the Galaxy](#guardians-of-the-galaxy) can trigger a backup. -### Bulk Loader +### Data import -[Bulk loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}) can be used to load the data in bulk. -By default, Bulk loader preserves the namespace in the data and schema files. -If there's no namespace information available, it loads the data into the default namespace. +[Initial import]({{< relref "bulk-loader.md" >}}) and [Live import]({{< relref "live-loader.md" >}}) tools support multi-tenancy. -Please refer to the [Bulk loader documentation]({{< relref "/deploy/fast-data-loading/bulk-loader.md#multi-tenancy-enterprise-feature" >}}) for examples and additional information. - -### Live Loader - -Since multi-tenancy works with ACL enabled, when using the [Live loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}), -you must provide the login credentials using the `--creds` flag. -By default, Live loader loads the data into the user's namespace. -[Guardians of the Galaxy](#guardians-of-the-galaxy) can load the data into multiple namespaces. - -Please refer to the [Live loader documentation]({{< relref "/deploy/fast-data-loading/live-loader.md#multi-tenancy-enterprise-feature" >}}) for examples and additional information. - -{{% notice "note" %}} -The Live loader requires that the `namespace` from the data and schema files exist before loading the data. -{{% /notice %}} - -{{% notice "tip" %}} -[Live loader](#live-loader) supports loading data into specific namespaces. -{{% /notice %}} ## Exports @@ -495,7 +475,7 @@ If a _Guardian of the Galaxy_ exports the whole cluster, a single folder contain Guardians of a Namespace can trigger an Export for their namespace. {{% /notice %}} -A namespace-specific export will contain the namespace value in the generated `.rdf` file: +A namespace-specific export will contain the namespace value in the generated `.rdf` file: ```rdf <0x01> "name" "ibrahim" <0x12> . -> this belongs to namespace 0x12 diff --git a/content/howto/importdata/about_import.md b/content/howto/importdata/about_import.md index 40475df8..a11d4e10 100644 --- a/content/howto/importdata/about_import.md +++ b/content/howto/importdata/about_import.md @@ -1,7 +1,18 @@ +++ title = "Import data" -keywords = "import data," +keywords = "import data, howto, task" [menu.main] parent = "importdata" weight = 1 -+++ \ No newline at end of file ++++ + +As an `Administrator` you can initialize a new Dgraph cluster by doing an [Initial import]({{< relref "bulk-loader.md" >}}) and you can import data into a running instance by performing a [Live import]({{< relref "live-loader.md" >}}). + + +Initial import is **considerably faster** than the live import but can only be used to load data into a new cluster (without prior data) and is executed before starting the Alpha nodes. + +{{% notice "note" %}} Contact us if you need to do an initial import on a Dgraph Cloud instance.{{% /notice %}} + + +{{% notice "note" %}} Both options accept [RDF N-Quad/Triple +data](https://www.w3.org/TR/n-quads/) or JSON format. Refers to [data migration]({{< relref "about-data-migration.md" >}}) to see how to convert other data formats.{{% /notice %}} diff --git a/content/deploy/fast-data-loading/bulk-loader.md b/content/howto/importdata/bulk-loader.md similarity index 92% rename from content/deploy/fast-data-loading/bulk-loader.md rename to content/howto/importdata/bulk-loader.md index b37737d7..eba18af9 100644 --- a/content/deploy/fast-data-loading/bulk-loader.md +++ b/content/howto/importdata/bulk-loader.md @@ -1,9 +1,9 @@ +++ date = "2017-03-20T22:25:17+11:00" -title = "Bulk Loader" -weight = 12 +title = "Initial import (Bulk Loader)" +weight = 2 [menu.main] - parent = "fast-data-loading" + parent = "importdata" +++ Dgraph Bulk Loader serves a similar purpose to the Dgraph Live Loader, but can @@ -138,8 +138,8 @@ $ tree ./out 4 directories, 6 files ``` -Because `--reduce_shards` was set to `2`, two sets of `p` directories are generated: -- the `./out/0` folder +Because `--reduce_shards` was set to `2`, two sets of `p` directories are generated: +- the `./out/0` folder - the `./out/1` folder Once the output is created, the files must be copied to all the servers that will run @@ -149,7 +149,7 @@ Dgraph Alphas: - Each replica of the second group (`Alpha4`, `Alpha5`, `Alpha6`) should have a copy of `./out/1/p`, and so on. {{% notice "note" %}} -Each Dgraph Alpha must have a copy of the group's `p` directory output. +Each Dgraph Alpha must have a copy of the group's `p` directory output. {{% /notice %}} ![Bulk Loader diagram](/images/deploy/bulk-loader.png) @@ -164,7 +164,7 @@ You can further configure Bulk Loader using the following options: - `--badger` superflag's `compression` option: Configure the compression of data on disk. By default, the Snappy compression format is used, but you can also use -Zstandard compression. Or, you can choose no compression to minimize CPU usage. +Zstandard compression. Or, you can choose no compression to minimize CPU usage. To learn more, see [Data Compression on Disk]({{< relref "/deploy/data-compression.md" >}}). - `--new_uids`: (default: false): Assign new UIDs instead of using the existing @@ -326,6 +326,23 @@ Input is not encrypted but the output is encrypted. (This is the migration use c Alternatively, starting with v20.07.0, the `vault_*` options can be used instead of the `--encryption key-file=value` option above to achieve the same effect except that the keys are sitting in a Vault server. +You can also use *bulk loader*, to turn off encryption. This will generate a new unencrypted `p` that will be used by the Alpha process. In this, case you need to pass `--encryption key-file`, `--encrypted` and `--encrypted_out` flags. + +```bash +# Encryption Key from the file path +dgraph bulk --files "" --schema "" --zero "" \ + --encrypted="true" --encrypted_out="false" \ + --encryption key-file="" + +# Encryption Key from HashiCorp Vault +dgraph bulk --files "" --schema "" --zero "" \ + --encrypted="true" --encrypted_out="false" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" + +``` + +In this case, we are also passing the flag `--encrypted=true` as the exported data has been taken from an encrypted Dgraph cluster and we are also specifying the flag `--encrypted_out=false` to specify that we want the `p` directory (_that will be generated by the bulk loader process_) to be unencrypted. + ## Tuning & monitoring ### Performance Tuning diff --git a/content/howto/importdata/import-data-cloud.md b/content/howto/importdata/import-data-cloud.md deleted file mode 100644 index 9dcf35e2..00000000 --- a/content/howto/importdata/import-data-cloud.md +++ /dev/null @@ -1,7 +0,0 @@ -+++ -title = "Import data to Dgraph Cloud" -keywords = "import, data, cloud" -[menu.main] - parent = "importdata" - weight = 2 -+++ \ No newline at end of file diff --git a/content/howto/importdata/import-data.md b/content/howto/importdata/import-data.md deleted file mode 100644 index 76880479..00000000 --- a/content/howto/importdata/import-data.md +++ /dev/null @@ -1,7 +0,0 @@ -+++ -title = "Import data to Dgraph" -keywords = "import, data, self hosted" -[menu.main] - parent = "importdata" - weight = 3 -+++ \ No newline at end of file diff --git a/content/deploy/fast-data-loading/live-loader.md b/content/howto/importdata/live-loader.md similarity index 86% rename from content/deploy/fast-data-loading/live-loader.md rename to content/howto/importdata/live-loader.md index 436ec37d..f37d8d32 100644 --- a/content/deploy/fast-data-loading/live-loader.md +++ b/content/howto/importdata/live-loader.md @@ -1,11 +1,16 @@ +++ date = "2017-03-20T22:25:17+11:00" -title = "Live Loader" -weight = 12 +title = "Live import" +weight = 3 [menu.main] - parent = "fast-data-loading" + parent = "importdata" +++ +[Dgraph Live Loader]({{< relref "live-loader.md" >}}) (run with `dgraph live`) imports data on a running Dgraph instance (which may have prior data). It is using our go client to send mutations to Dgraph and has options to handle unique IDs assigment and to update existing data. + +{{% notice "note" %}} Both options accept [RDF N-Quad/Triple +data](https://www.w3.org/TR/n-quads/) or JSON format. Refers to [data migration]({{< relref "about-data-migration.md" >}}) to see how to convert other data formats.{{% /notice %}} + Dgraph Live Loader (run with `dgraph live`) is a small helper program which reads RDF N-Quads from a gzipped file, batches them up, creates mutations (using the go client) and shoots off to Dgraph. Dgraph Live Loader correctly handles assigning unique IDs to blank nodes across multiple files, and can optionally persist them to disk to save memory, in case the loader was re-run. @@ -40,6 +45,28 @@ dgraph live \ --zero ``` +## Importing data with Live Loader + +It is possible to import data into a Dgraph Cloud backend using [live loader](https://dgraph.io/docs/deploy/#live-loader). In order to import data, do the following steps: + +1. First import your schema into your Dgraph Cloud backend, using either the [Schema API](/admin/schema) or via [the Schema Page](https://cloud.dgraph.io/_/schema). +2. Log into Dgraph Cloud, and find your backend's `gRPC Endpoint` on the Settings page. This will look like `frozen-mango.grpc.us-west-1.aws.cloud.dgraph.io:443` + +{{% notice "note" %}} +The gRPC endpoint URL must have the string `.grpc.` added after the domain prefix. Without this change, Live Loader will not be able to find the endpoint. +{{% /notice %}} + +3. Run the live loader as follows: + + ``` + docker run -it --rm -v /path/to/g01.json.gz:/tmp/g01.json.gz dgraph/dgraph:v21.03-slash \ + dgraph live --slash_grpc_endpoint=:443 -f /tmp/g01.json.gz -t + ``` + +{{% notice "note" %}} +Running this via Docker requires you to use an unreleased tag (either `master` or `v21.03-slash`). +{{% /notice %}} + ## Load from S3 To live load from [Amazon S3 (Simple Storage Service)](https://aws.amazon.com/s3/), you must have either permissions to access the S3 bucket from the system performing live load (see [IAM setup](#iam-setup) below) or explicitly add the following AWS credentials set via environment variables: @@ -130,6 +157,9 @@ dgraph live \ --creds="user=groot;password=password;namespace=0" \ --force-namespace 123 ``` +{{% notice "note" %}} +The Live loader requires that the `namespace` from the data and schema files exist before loading the data. +{{% /notice %}} ### Encrypted imports (Enterprise Feature) @@ -148,7 +178,20 @@ dgraph live \ --schema \ --encryption key-file= ``` +You can import your encrypted data into a new Dgraph Alpha node without encryption enabled. + +```bash +# Encryption Key from the file path +dgraph live --files "" --schema "" \ + --alpha "" --zero "" \ + --encryption key-file="" +# Encryption Key from HashiCorp Vault +dgraph live --files "" --schema "" \ + --alpha "" --zero "" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" + +``` ## Batch upserts With batch upserts in Live Loader, you can insert big data-sets (multiple files) into an existing cluster that might contain nodes that already exist in the graph. diff --git a/content/installation/kubernetes.md b/content/installation/kubernetes.md index 29388c93..222ec0b4 100644 --- a/content/installation/kubernetes.md +++ b/content/installation/kubernetes.md @@ -267,7 +267,7 @@ In order to expose the Alpha service and Ratel service publicly you can use Kube ##### LoadBalancer (Public Internet) -To use an external load balancer, set the service type to `LoadBalancer`. +To use an external load balancer, set the service type to `LoadBalancer`. {{% notice "note" %}}For security purposes we recommend limiting access to any public endpoints, such as using a white list.{{% /notice %}} @@ -294,7 +294,7 @@ An external load balancer can be configured to face internally to a private subn |Google Cloud|[GKE: Internal Load Balancing](https://cloud.google.com/kubernetes-engine/docs/how-to/internal-load-balancing)|`cloud.google.com/load-balancer-type: "Internal"`| -As an example, using Amazon [EKS](https://aws.amazon.com/eks/) as the provider, you could create a Helm chart configuration values like this below: +As an example, using Amazon [EKS](https://aws.amazon.com/eks/) as the provider, you could create a Helm chart configuration values like this below: ```yaml # my-config-values.yaml @@ -659,7 +659,7 @@ require you to update the StatefulSet configuration. ## Kubernetes and Bulk Loader You may want to initialize a new cluster with an existing data set such as data -from the [Dgraph Bulk Loader]({{< relref "deploy/fast-data-loading/bulk-loader.md" >}}). You can use [Init +from the [Dgraph Bulk Loader]({{< relref "bulk-loader.md" >}}). You can use [Init Containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) to copy the data to the pod volume before the Alpha process runs. diff --git a/content/migration/about-data-migration.md b/content/migration/about-data-migration.md index 611258c5..72bc8b64 100644 --- a/content/migration/about-data-migration.md +++ b/content/migration/about-data-migration.md @@ -10,9 +10,6 @@ To load CSV-formatted data or SQL data into Dgraph, first convert the dataset into one of the accepted formats ([RDF N-Quad/Triple](https://www.w3.org/TR/n-quads/) or JSON) and then load the resulting dataset into Dgraph. -After you convert the `.csv` or `.sql` files to [RDF N-Quad/Triple](https://www.w3.org/TR/n-quads/) or JSON, -you can use [Dgraph Live Loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}) or -[Dgraph Bulk Loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}) to import your data. - - - +After you convert the `.csv` or `.sql` files to [RDF N-Quad/Triple](https://www.w3.org/TR/n-quads/) or JSON, +you can use [Dgraph Live Loader]({{< relref "live-loader.md" >}}) or +[Dgraph Bulk Loader]({{< relref "bulk-loader.md" >}}) to import your data. diff --git a/content/migration/loading-csv-data.md b/content/migration/loading-csv-data.md index a437ebf3..2e15394a 100644 --- a/content/migration/loading-csv-data.md +++ b/content/migration/loading-csv-data.md @@ -8,7 +8,7 @@ weight = 6 ## Convert CSV to JSON -There are many tools available to convert CSV to JSON. You can import large data sets to Dgraph using [Dgraph Live Loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}) or [Dgraph Bulk Loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}). In these examples, the `csv2json` tool is used, and the data is imported using the **Mutate** tab in Ratel. +There are many tools available to convert CSV to JSON. You can import large data sets to Dgraph using [Dgraph Live Loader]({{< relref "live-loader.md" >}}) or [Dgraph Bulk Loader]({{< relref "bulk-loader.md" >}}). In these examples, the `csv2json` tool is used, and the data is imported using the **Mutate** tab in Ratel. ### Before you begin @@ -51,7 +51,7 @@ There are many tools available to convert CSV to JSON. You can import large data ``` This JSON file follows - the [JSON Mutation Format]({{< relref "mutations/json-mutation-format.md" >}}), it can be loaded into Dgraph using [Dgraph Live Loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}) , [Dgraph Bulk Loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}) or the programmatic clients. + the [JSON Mutation Format]({{< relref "mutations/json-mutation-format.md" >}}), it can be loaded into Dgraph using [Dgraph Live Loader]({{< relref "live-loader.md" >}}) , [Dgraph Bulk Loader]({{< relref "bulk-loader.md" >}}) or the programmatic clients. 4. To load the data to Ratel and HTTP clients. The JSON data has to be stored within the `"set"` [key]({{< relref "mutations/json-mutation-format.md#json-syntax-using-raw-http-or-ratel-ui" @@ -102,14 +102,14 @@ There are many tools available to convert CSV to JSON. You can import large data ``` 2. To get the correct JSON format, you can convert the CSV into JSON and use `jq` -to transform it in the correct format where the `connects` edge is a node `uid`. -This JSON file can be loaded into Dgraph using the programmatic clients. +to transform it in the correct format where the `connects` edge is a node `uid`. +This JSON file can be loaded into Dgraph using the programmatic clients. ```sh $ csv2json connects.csv | jq '[ .[] | { uid: .uid, connects: { uid: .connects } } ]' ``` The output is similar to: - + ```json [ { @@ -181,4 +181,4 @@ This JSON file can be loaded into Dgraph using the programmatic clients. To reuse existing integer IDs from a CSV file as UIDs in Dgraph, use Dgraph Zero's [assign endpoint]({{< relref "deploy/dgraph-zero" >}}) before loading data to allocate a range of UIDs that can be safely assigned. {{% /notice %}} -4. Paste the output in the **Mutate** tab of **Console** in Ratel, and click **Run** to import data. \ No newline at end of file +4. Paste the output in the **Mutate** tab of **Console** in Ratel, and click **Run** to import data. diff --git a/content/migration/migrate-tool.md b/content/migration/migrate-tool.md index bf0db8df..9a3038cc 100644 --- a/content/migration/migrate-tool.md +++ b/content/migration/migrate-tool.md @@ -28,7 +28,7 @@ dgraph migrate [flags] 1. Create a `config.properties` file that has the following settings and values should not be in quotes: ```txt - user = + user = password = db = ``` @@ -59,8 +59,8 @@ After the migration is complete, two new files are available: ### Importing the data -The two files can then be imported into Dgraph using the [Dgraph Live Loader]({{< relref "/deploy/fast-data-loading/live-loader.md" >}}) -or [Bulk Loader]({{< relref "/deploy/fast-data-loading/bulk-loader.md" >}}). Sometimes you might want to customize your schema. +The two files can then be imported into Dgraph using the [Dgraph Live Loader]({{< relref "live-loader.md" >}}) +or [Bulk Loader]({{< relref "bulk-loader.md" >}}). Sometimes you might want to customize your schema. For example, you might add an index to a predicate, or change an inter-object predicate (edge) from unidirectional to bidirectional by adding the `@reverse` directive. If you would like such customizations, you should do it by editing the schema file generated by the migration tool before feeding the files to the Live Loader or Bulk Loader. @@ -73,4 +73,4 @@ unidirectional to bidirectional by adding the `@reverse` directive. If you would ```sh dgraph live --slash_grpc_endpoint=:443 -f sql.rdf --format=rdf --schema schema.txt -t ``` - For detailed instructions to import data to Dgraph cloud, see [import data](https://dgraph.io/docs/cloud/admin/import-export/). \ No newline at end of file + For detailed instructions to import data to Dgraph cloud, see [import data](https://dgraph.io/docs/cloud/admin/import-export/). diff --git a/content/mutations/batch-mutations.md b/content/mutations/batch-mutations.md deleted file mode 100644 index 09df4390..00000000 --- a/content/mutations/batch-mutations.md +++ /dev/null @@ -1,16 +0,0 @@ -+++ -date = "2017-03-20T22:25:17+11:00" -title = "Batch Mutations" -weight = 6 -[menu.main] - parent = "mutations" -+++ - -Each mutation may contain multiple RDF triples. For large data uploads many such mutations can be batched in parallel. The command `dgraph live` does just this; by default batching 1000 RDF lines into a query, while running 100 such queries in parallel. - -`dgraph live` takes as input gzipped N-Quad files (that is triple lists without `{ set {`) and batches mutations for all triples in the input. The tool has documentation of options. - -```sh -dgraph live --help -``` -See also [Fast Data Loading]({{< relref "deploy/fast-data-loading/overview.md" >}}). From 0f06a277e7719cdc2cda23a4937720b8802be455 Mon Sep 17 00:00:00 2001 From: rderbier Date: Mon, 23 Jan 2023 16:57:31 -0800 Subject: [PATCH 4/5] Initial revamp of import data --- content/howto/importdata/live-loader.md | 405 +++++++----------------- 1 file changed, 109 insertions(+), 296 deletions(-) diff --git a/content/howto/importdata/live-loader.md b/content/howto/importdata/live-loader.md index f37d8d32..4ae14ea9 100644 --- a/content/howto/importdata/live-loader.md +++ b/content/howto/importdata/live-loader.md @@ -6,68 +6,132 @@ weight = 3 parent = "importdata" +++ -[Dgraph Live Loader]({{< relref "live-loader.md" >}}) (run with `dgraph live`) imports data on a running Dgraph instance (which may have prior data). It is using our go client to send mutations to Dgraph and has options to handle unique IDs assigment and to update existing data. +You can import data on a running Dgraph instance (which may have prior data) using Dgraph CLI command [dgraph live]({{< relref "cli-command-reference.md#dgraph-live" >}}) referred to as **Live Loader**. +Live Loader sends mutations to a Dgraph cluster and has options to handle unique IDs assignment and to update existing data. -{{% notice "note" %}} Both options accept [RDF N-Quad/Triple -data](https://www.w3.org/TR/n-quads/) or JSON format. Refers to [data migration]({{< relref "about-data-migration.md" >}}) to see how to convert other data formats.{{% /notice %}} +{{% notice "note" %}} Live Loader accepts [RDF N-Quad/Triple +data](https://www.w3.org/TR/n-quads/) or JSON in plain or gzipped format. Refers to [data migration]({{< relref "about-data-migration.md" >}}) to see how to convert other data formats.{{% /notice %}} -Dgraph Live Loader (run with `dgraph live`) is a small helper program which reads RDF N-Quads from a gzipped file, batches them up, creates mutations (using the go client) and shoots off to Dgraph. +## Before you begin +Verify that you have a local folder `` containing +- at least one **data file** in RDF or JSON in plain or gzip format with the data to import +- an optional **schema file**. -Dgraph Live Loader correctly handles assigning unique IDs to blank nodes across multiple files, and can optionally persist them to disk to save memory, in case the loader was re-run. +Those files have been generated by an [export]({{< relref "about-export.md" >}}) or by a [data migration]({{< relref "about-data-migration.md" >}}) tool. -{{% notice "note" %}} Dgraph Live Loader can optionally write the `xid`->`uid` mapping to a directory specified using the `--xidmap` flag, which can reused -given that live loader completed successfully in the previous run.{{% /notice %}} -{{% notice "note" %}} Live Loader only accept [RDF N-Quad/Triple -data](https://www.w3.org/TR/n-quads/) or JSON in plain or gzipped format. Data -in other formats must be converted.{{% /notice %}} +## Importing data on Dgraph Cloud +1. Obtain dgraph binary or the latest docker image by following the [installation]({{< relref "download.md" >}}) instructions. + This is required to run Dgraph CLI command `dgraph live`. +1. Obtain the `GRPC endpoint` of you Dgraph Cloud backend and a valid `Client API key`. + + An administrator gets those information with the following steps: + 1. Log into the Dgraph Cloud account, select the backend + 2. In the `Admin` section of the Dgraph Cloud console, go to `Settings` and copy the value of the `gRPC Endpoint` from the `General` tab. + 3. Access the `API Keys` tab to generate an `Client API Key`. + + + +{{% notice "note" %}} The gRPC endpoint is different from the `GraphQL endpoint` that you can find in the section `Overview`. The gRPC endpoint looks like `frozen-mango.grpc.us-west-1.aws.cloud.dgraph.io:443` {{% /notice %}} + + +3. Run the live loader as follows: +{{% tabs %}} {{< tab "Docker" >}} +``` +docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --slash_grpc_endpoint -f /tmp/ -s /tmp/ -t +``` + +Load multiple data files by using +``` +docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --slash_grpc_endpoint -f /tmp -s /tmp/ -t +``` + +When the path provided with `-f, --files` option is a directory, then all files +ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. Be sure that your schema file has another extension (.txt or .schema for example). + +{{< /tab >}} +{{% tab "Local" %}} +``` +dgraph live --slash_grpc_endpoint -f / -s / -t +``` +Load multiple data files by using +``` +dgraph live --slash_grpc_endpoint -f /tmp -s /tmp/ -t +``` + +When the path provided with `-f, --files` option is a directory, then all files +ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. Be sure that your schema file has another extension (.txt or .schema for example). +{{% /tab %}}{{% /tabs %}} + +## Batch upserts +You can use Live loader to update existing data, either to modify existing predicates are to add new predicates to existing nodes. + + +To do so, use the `-U, --upsertPredicate` flag or the `-x, --xidmap` flag. + +### upsertPredicate flag +Use the `-U, --upsertPredicate` flag to specify the predicate name in your data that will serve as unique identifier. + +For example: ```sh -dgraph live --help # To see the available flags. +dgraph live --files --schema --upsertPredicate xid +``` -# Read RDFs or JSON from the passed file, and send them to Dgraph on localhost:9080. -dgraph live --files +The upsert predicate used must be present the Dgraph instance or in the schema file and must be indexed. +For each node, Live loader will use the node name provided in the data file as the upsert predicate value. +For example if your data file contains +``` +<_:my.org/customer/1> "John" . +``` -# Read multiple RDFs or JSON from the passed path, and send them to Dgraph on localhost:9080. -dgraph live --files <./path-to-gzipped-RDF-or-JSON-files> +The previous command creates or updates the node with predicate `xid` equal to `my.org/customer/1` and will set it's predicate `firstName` with the value `John`. -# Read multiple files strictly by name. -dgraph live --files +### xidmap flag -# Use compressed gRPC connections to and from Dgraph. -dgraph live --use_compression --files +```sh +dgraph live --files --schema --xidmap +``` +Live loader uses `-x, --xidmap` directory to lookup the `uid` value for each node name used in the data file or to store the mapping between the node names and the generated `uid` for every new node. -# Read RDFs and a schema file and send to Dgraph running at given address. -dgraph live \ - --files \ - --schema \ - --alpha \ - --zero + +## Import data on Dgraph self-hosted + +Run the live loader using the the `-a, --alpha` flag as follows + +{{% tabs %}} {{< tab "Docker" >}} +``` +docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --alpha -f /tmp/ -s /tmp/ ``` -## Importing data with Live Loader +Load multiple data files by using +``` +docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --alpha -f /tmp -s /tmp/ +``` -It is possible to import data into a Dgraph Cloud backend using [live loader](https://dgraph.io/docs/deploy/#live-loader). In order to import data, do the following steps: +`--alpha` default value is `localhost:9080`. You can specify a comma separated list of alphas addresses in the same cluster to distribute the load. -1. First import your schema into your Dgraph Cloud backend, using either the [Schema API](/admin/schema) or via [the Schema Page](https://cloud.dgraph.io/_/schema). -2. Log into Dgraph Cloud, and find your backend's `gRPC Endpoint` on the Settings page. This will look like `frozen-mango.grpc.us-west-1.aws.cloud.dgraph.io:443` -{{% notice "note" %}} -The gRPC endpoint URL must have the string `.grpc.` added after the domain prefix. Without this change, Live Loader will not be able to find the endpoint. -{{% /notice %}} +When the path provided with `-f, --files` option is a directory, then all files +ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. Be sure that your schema file has another extension (.txt or .schema for example). -3. Run the live loader as follows: +{{< /tab >}} +{{% tab "Local" %}} + +``` + dgraph live --alpha -f / -s / +``` - ``` - docker run -it --rm -v /path/to/g01.json.gz:/tmp/g01.json.gz dgraph/dgraph:v21.03-slash \ - dgraph live --slash_grpc_endpoint=:443 -f /tmp/g01.json.gz -t - ``` -{{% notice "note" %}} -Running this via Docker requires you to use an unreleased tag (either `master` or `v21.03-slash`). -{{% /notice %}} +`--alpha` default value is `localhost:9080`. You can specify a comma separated list of alphas addresses in the same cluster to distribute the load. +{{% /tab %}}{{% /tabs %}} -## Load from S3 + +### Load from S3 To live load from [Amazon S3 (Simple Storage Service)](https://aws.amazon.com/s3/), you must have either permissions to access the S3 bucket from the system performing live load (see [IAM setup](#iam-setup) below) or explicitly add the following AWS credentials set via environment variables: @@ -76,7 +140,7 @@ To live load from [Amazon S3 (Simple Storage Service)](https://aws.amazon.com/s3 `AWS_ACCESS_KEY_ID` or `AWS_ACCESS_KEY` | AWS access key with permissions to write to the destination bucket. `AWS_SECRET_ACCESS_KEY` or `AWS_SECRET_KEY` | AWS access key with permissions to write to the destination bucket. -### IAM setup +#### IAM setup In AWS, you can accomplish this by doing the following: @@ -105,7 +169,7 @@ The short form of the S3 URL requires S3 URL is prefixed with `s3:///` (noticed {{% /notice %}} -## Load from MinIO +### Load from MinIO To live load from MinIO, you must have the following MinIO credentials set via environment variables: @@ -124,6 +188,7 @@ dgraph live \ --schema minio://minio-server:port///schema.txt ``` + ## Enterprise Features ### Multi-tenancy (Enterprise Feature) @@ -192,22 +257,6 @@ dgraph live --files "" --schema " --schema --upsertPredicate -``` ## Other Live Loader options @@ -215,9 +264,7 @@ dgraph live --files --schema - UIDs in data files. This is useful to avoid overriding the data in a DB already in operation. -`-f, --files`: Location of *.rdf(.gz) or *.json(.gz) file(s) to load. It can -load multiple files in a given path. If the path is a directory, then all files -ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. + `--format`: Specify file format (`rdf` or `json`) instead of getting it from filenames. This is useful if you need to define a strict format manually. @@ -230,241 +277,7 @@ Do not confuse with `-C`. `-C, --use_compression` (default: `false`): Enable compression for connections to and from the Alpha server. -`-a, --alpha` (default: `localhost:9080`): Dgraph Alpha gRPC server address to connect for live loading. This can be a comma-separated list of Alphas addresses in the same cluster to distribute the load, e.g., `"alpha:grpc_port,alpha2:grpc_port,alpha3:grpc_port"`. - -`-x, --xidmap` (default: disabled. Need a path): Store `xid` to `uid` mapping to a directory. Dgraph will save all identifiers used in the load for later use in other data ingest operations. The mapping will be saved in the path you provide and you must indicate that same path in the next load. -{{% notice "tip" %}} -Using the `--xidmap` flag is recommended if you have full control over your identifiers (Blank-nodes). Because the identifier will be mapped to a specific `uid`. -{{% /notice %}} - -The `--ludicrous` superflag's `enabled` option (default: `false`): This option allows the user to notify Live Loader that the Alpha server is running in ludicrous mode. -Live Loader, by default, does smart batching of data to avoid transaction conflicts, which improves the performance in normal mode. -Since there's no conflict detection in Ludicrous mode, smart batching is disabled to speed up data ingestion. - -{{% notice "note" %}} -You should only use the `--ludicrous` superflag's `enabled` option if Dgraph is also running in [ludicrous mode]({{< relref "ludicrous-mode.md" >}}). -{{% /notice %}} - -`-U, --upsertPredicate` (default: disabled): Runs Live Loader in `upsertPredicate` mode. The provided value will be used to store blank nodes as a `xid`. `--vault` [superflag's]({{< relref "deploy/cli-command-reference" >}}) options specify the Vault server address, role id, secret id, and field that contains the encryption key required to decrypt the encrypted export. - -## `upsertPredicate` example - -You might find that discrete pieces of information regarding entities are arriving through independent data feeds. -The feeds might involve adding basic information (first and last name), income, and address in separate files. -You can use the live loader to correlate individual records from these files and combine attributes to create a consolidated Dgraph node. - -Start by adding the following schema: - -``` -
: [uid] @reverse . -: float . -: string @index(exact) . -: string @index(exact) . -: string @index(exact) . -: string @index(exact) . -: string @index(hash) . -``` - -### The Upsert predicate - -You can upload the files individually using the live loader (`dgraph live`) with the `-U` or `--upsertPredicate` option. -Each file has records with external keys for customers (e.g., `my.org/customer/1`) and addresses (e.g., `my.org/customer/1/address/1`). - -The schema has the required fields in addition to a field named `xid`. This field will be used to hold the external key value. Please note that there's a `hash` index for the `xid` field. You will be using this `xid` field as the "Upsert" predicate (`-U` option) and pass it as an argument to the `dgraph live` command. The live loader uses the predicate's content provided by the `-U` option (`xid` in this case) to identify and update the corresponding Dgraph node. In case the corresponding Dgraph node does not exist, the live loader will create a new node. - -**File** `customerNames.rdf` - Basic information like customer's first and last name: - -``` -<_:my.org/customer/1> "John" . -<_:my.org/customer/1> "Doe" . -<_:my.org/customer/2> "James" . -<_:my.org/customer/2> "Doe" . -``` - -You can load the customer information with the following command: - -```sh -dgraph live --files customerNames.rdf --upsertPredicate "xid" -``` - -Next, you can inspect the loaded data: - -```graphql -{ - q1(func: has(firstName)){ - uid - firstName - lastName - annualIncome - xid - address{ - uid - street - xid - } - } -} -``` - -The query will return the newly created Dgraph nodes as shown below. - -```json -"q1": [ - { - "uid": "0x14689d2", - "firstName": "John", - "lastName": "Doe", - "xid": "my.org/customer/1" - }, - { - "uid": "0x14689d3", - "firstName": "James", - "lastName": "Doe", - "xid": "my.org/customer/2" - } -] -``` - -You can see the new customer added with name information and the contents of the `xid` field. -The `xid` field holds a reference to the externally provided id. - -**File** `customer_income.rdf` - Income information about the customer: - -``` -<_:my.org/customer/1> "90000" . -<_:my.org/customer/2> "75000" . -``` - -You can load the income information by running: - -```sh -dgraph live --files customer_income.rdf --upsertPredicate "xid" -``` - -Now you can execute a query to check the income data: - -```graphql -{ - q1(func: has(firstName)){ - uid - firstName - lastName - annualIncome - xid - address{ - uid - street - city - xid - } - } -} -``` - -Note that the corresponding nodes have been correctly updated with the `annualIncome` attribute. - -```json -"q1": [ - { - "uid": "0x14689d2", - "firstName": "John", - "lastName": "Doe", - "annualIncome": 90000, - "xid": "my.org/customer/1" - }, - { - "uid": "0x14689d3", - "firstName": "James", - "lastName": "Doe", - "annualIncome": 75000, - "xid": "my.org/customer/2" - } -] -``` - -**File** `customer_address.rdf` - Address information: - -``` -<_:my.org/customer/1>
<_:my.org/customer/1/address/1> . -<_:my.org/customer/1/address/1> "One High Street" . -<_:my.org/customer/1/address/1> "London" . -<_:my.org/customer/2>
<_:my.org/customer/2/address/1> . -<_:my.org/customer/2/address/1> "Two Main Street" . -<_:my.org/customer/2/address/1> "New York" . -<_:my.org/customer/2>
<_:my.org/customer/2/address/2> . -<_:my.org/customer/2/address/2> "Ten Main Street" . -<_:my.org/customer/2/address/2> "Mumbai" . -``` - -You can extend the same approach to update `uid` predicates. -To load the addresses linked to customers, you can launch the live loader as below. - -```sh -dgraph live --files customer_address.rdf --upsertPredicate "xid" -``` - -You can check the output of the query: - -```graphql -{ - q1(func: has(firstName)){ - uid - firstName - lastName - annualIncome - xid - address{ - uid - street - xid - } - } -} -``` - -The addresses are correctly added as a `uid` predicate in the respective customer nodes. - -```json -"q1": [ - { - "uid": "0x14689d2", - "firstName": "John", - "lastName": "Doe", - "annualIncome": 90000, - "xid": "my.org/customer/1", - "address": [ - { - "uid": "0x1945bb6", - "street": "One High Street", - "city": "London", - "xid": "my.org/customer/1/address/1" - } - ] - }, - { - "uid": "0x14689d3", - "firstName": "James", - "lastName": "Doe", - "annualIncome": 75000, - "xid": "my.org/customer/2", - "address": [ - { - "uid": "0x1945bb4", - "street": "Two Main Street", - "city": "New York", - "xid": "my.org/customer/2/address/1" - }, - { - "uid": "0x1945bb5", - "street": "Ten Main Street", - "city": "Mumbai", - "xid": "my.org/customer/2/address/2" - } - ] - } -] -``` From 9e6d5d802c22f549b33023f9ad712360b19cf36b Mon Sep 17 00:00:00 2001 From: rderbier Date: Thu, 26 Jan 2023 07:44:03 -0800 Subject: [PATCH 5/5] change page order --- content/howto/importdata/bulk-loader.md | 2 +- content/howto/importdata/live-loader.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/content/howto/importdata/bulk-loader.md b/content/howto/importdata/bulk-loader.md index eba18af9..96f0c30d 100644 --- a/content/howto/importdata/bulk-loader.md +++ b/content/howto/importdata/bulk-loader.md @@ -1,7 +1,7 @@ +++ date = "2017-03-20T22:25:17+11:00" title = "Initial import (Bulk Loader)" -weight = 2 +weight = 3 [menu.main] parent = "importdata" +++ diff --git a/content/howto/importdata/live-loader.md b/content/howto/importdata/live-loader.md index 4ae14ea9..5b194629 100644 --- a/content/howto/importdata/live-loader.md +++ b/content/howto/importdata/live-loader.md @@ -1,7 +1,7 @@ +++ date = "2017-03-20T22:25:17+11:00" title = "Live import" -weight = 3 +weight = 2 [menu.main] parent = "importdata" +++