Skip to content

Commit

Permalink
Merge branch 'master' into untyped_storage
Browse files Browse the repository at this point in the history
  • Loading branch information
drivanov committed Mar 7, 2024
2 parents 77473a7 + 34ae70b commit c259d1b
Show file tree
Hide file tree
Showing 116 changed files with 5,635 additions and 1,159 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@
[submodule "third_party/cccl"]
path = third_party/cccl
url = https://github.com/NVIDIA/cccl.git
[submodule "third_party/liburing"]
path = third_party/liburing
url = https://github.com/axboe/liburing.git
20 changes: 17 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ endif()
if(USE_CUDA)
message(STATUS "Build with CUDA support")
project(dgl C CXX)
# see https://github.com/NVIDIA/thrust/issues/1401
add_definitions(-DTHRUST_CUB_WRAPPED_NAMESPACE=dgl)
include(cmake/modules/CUDA.cmake)
message(STATUS "Use external CCCL library for a consistent API and performance.")
cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cccl/thrust")
Expand Down Expand Up @@ -351,7 +349,23 @@ endif(EXTERNAL_PHMAP_PATH)
target_include_directories(dgl PRIVATE "tensoradapter/include")
target_include_directories(dgl PRIVATE "third_party/pcg/include")


if(CMAKE_SYSTEM_NAME MATCHES "Linux")
include(ExternalProject)
set(LIBURING_INSTALL_DIR ${CMAKE_BINARY_DIR}/third_party/liburing)
ExternalProject_Add(
liburing
SOURCE_DIR ${CMAKE_SOURCE_DIR}/third_party/liburing
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=${LIBURING_INSTALL_DIR}
BUILD_COMMAND bash -c "make -j 4"
BUILD_IN_SOURCE ON
INSTALL_COMMAND make install DESTDIR=${LIBURING_INSTALL_DIR}
BUILD_BYPRODUCTS ${LIBURING_INSTALL_DIR}/lib/liburing.a
BUILD_BYPRODUCTS ${LIBURING_INSTALL_DIR}/include
DOWNLOAD_EXTRACT_TIMESTAMP true
)
set(LIBURING_INCLUDE ${LIBURING_INSTALL_DIR}/include)
set(LIBURING ${LIBURING_INSTALL_DIR}/lib/liburing.a)
endif()

if(EXTERNAL_NANOFLANN_PATH)
include_directories(SYSTEM ${EXTERNAL_NANOFLANN_PATH})
Expand Down
18 changes: 9 additions & 9 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240123_1000"
image "dgllib/dgl-ci-cpu:v240227_1200"
args "-u root"
alwaysPull true
}
Expand All @@ -337,7 +337,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
args "-u root"
alwaysPull true
}
Expand Down Expand Up @@ -392,7 +392,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240123_1000"
image "dgllib/dgl-ci-cpu:v240227_1200"
args "-u root"
alwaysPull true
}
Expand All @@ -411,7 +411,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
args "-u root --runtime nvidia"
alwaysPull true
}
Expand Down Expand Up @@ -466,7 +466,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
args "-u root --runtime nvidia"
alwaysPull true
}
Expand All @@ -491,7 +491,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240123_1000"
image "dgllib/dgl-ci-cpu:v240227_1200"
args "-u root --shm-size=4gb"
alwaysPull true
}
Expand Down Expand Up @@ -544,7 +544,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
image "dgllib/dgl-ci-gpu:cu118_v240227_1200"
args "-u root --runtime nvidia --shm-size=8gb"
alwaysPull true
}
Expand Down Expand Up @@ -573,7 +573,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240123_1000"
image "dgllib/dgl-ci-cpu:v240227_1200"
args "-u root --shm-size=4gb"
alwaysPull true
}
Expand Down Expand Up @@ -620,7 +620,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v240123_1000"
image "dgllib/dgl-ci-cpu:v240227_1200"
args "-u root"
alwaysPull true
}
Expand Down
2 changes: 1 addition & 1 deletion conda/dgl/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package:
name: dgl{{ environ.get('DGL_PACKAGE_SUFFIX', '') }}
version: 2.1{{ environ.get('DGL_VERSION_SUFFIX', '') }}
version: 2.2{{ environ.get('DGL_VERSION_SUFFIX', '') }}

source:
git_rev: {{ environ.get('DGL_RELEASE_BRANCH', 'master') }}
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.ci_cpu
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# CI docker CPU env
# Adapted from github.com/dmlc/tvm/docker/Dockerfile.ci_cpu
FROM ubuntu:18.04
FROM ubuntu:20.04

ENV TZ=US
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
Expand Down
7 changes: 1 addition & 6 deletions docker/Dockerfile.ci_gpu
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
# CI docker GPU env
FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu18.04
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04

ENV TZ=US
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

# Update outdated public key from NVIDIA
RUN apt-key del 3bf863cc
RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
RUN apt-get update --fix-missing

COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
RUN bash /install/ubuntu_install_core.sh

Expand Down
2 changes: 1 addition & 1 deletion docker/install/conda_env/torch_cpu.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: pytorch-ci
dependencies:
- python=3.8
- python=3.10
- pip
- pip:
- --find-links https://download.pytorch.org/whl/torch_stable.html
Expand Down
2 changes: 1 addition & 1 deletion docker/install/conda_env/torch_cpu_pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ rdflib
requests[security]==2.28
scikit-learn
scipy
torch==1.13.0+cpu
torch==2.0.0+cpu
torchdata
torcheval
torchmetrics
Expand Down
2 changes: 1 addition & 1 deletion docker/install/conda_env/torch_gpu.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: pytorch-ci
dependencies:
- python=3.8
- python=3.10
- pip
- pip:
- --find-links https://download.pytorch.org/whl/torch_stable.html
Expand Down
2 changes: 1 addition & 1 deletion docker/install/conda_env/torch_gpu_pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ rdflib
requests[security]==2.28
scikit-learn
scipy
torch==1.13.0+cu116
torch==2.0.0+cu118
torchdata
torcheval
torchmetrics
Expand Down
1 change: 1 addition & 0 deletions docs/source/api/python/dgl.distributed.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,4 @@ Split and Load Partitions
load_partition_feats
load_partition_book
partition_graph
dgl_partition_to_graphbolt
7 changes: 0 additions & 7 deletions docs/source/api/python/nn-pytorch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,7 @@ Utility Modules
~dgl.nn.pytorch.explain.PGExplainer
~dgl.nn.pytorch.explain.HeteroPGExplainer
~dgl.nn.pytorch.utils.LabelPropagation
~dgl.nn.pytorch.graph_transformer.DegreeEncoder
~dgl.nn.pytorch.utils.LaplacianPosEnc
~dgl.nn.pytorch.graph_transformer.BiasedMultiheadAttention
~dgl.nn.pytorch.graph_transformer.EGTLayer
~dgl.nn.pytorch.graph_transformer.GraphormerLayer
~dgl.nn.pytorch.graph_transformer.PathEncoder
~dgl.nn.pytorch.graph_transformer.SpatialEncoder
~dgl.nn.pytorch.graph_transformer.SpatialEncoder3d

Network Embedding Modules
----------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@
# -- Extension configuration -------------------------------------------------
autosummary_generate = True
autodoc_member_order = "alphabetical"
# Skip the following members.
autodoc_mock_imports = ["dgl.nn.mxnet", "dgl.nn.tensorflow"]

intersphinx_mapping = {
"python": (
Expand Down
2 changes: 0 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ Welcome to Deep Graph Library Tutorials and Documentation
api/python/dgl.geometry
api/python/dgl.graphbolt
api/python/nn-pytorch
api/python/nn-tensorflow
api/python/nn-mxnet
api/python/nn.functional
api/python/dgl.ops
api/python/dgl.optim
Expand Down
7 changes: 7 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ The folder contains example implementations of selected research papers related

To quickly locate the examples of your interest, search for the tagged keywords or use the search tool on [dgl.ai](https://www.dgl.ai/).

## 2024

- <a name="labor"></a> Lin et al. ARGO: An Auto-Tuning Runtime System for Scalable GNN Training on Multi-Core Processor. [Paper link](https://arxiv.org/abs/2402.03671)
- Example code: [PyTorch](https://github.com/dmlc/dgl/tree/master/examples/pytorch/argo)

- Tags: semi-supervised node classification

## 2023

- <a name="labor"></a> Zheng Wang et al. From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited. [Paper link](https://arxiv.org/abs/2210.13339)
Expand Down
60 changes: 60 additions & 0 deletions examples/distributed/graphsage/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,63 @@ python3 ~/workspace/dgl/tools/launch.py \
--ip_config ip_config.txt \
"python3 node_classification.py --graph_name ogbn-products --ip_config ip_config.txt --num_epochs 30 --batch_size 1000 --num_gpus 4"
```

### Running with GraphBolt

In order to run with `GraphBolt`, we need to partition graph into `GraphBolt` data formats.Please note that both `DGL` and `GraphBolt` partitions are saved together.

```
python3 partition_graph.py --dataset ogbn-products --num_parts 2 --balance_train --balance_edges --use_graphbolt
```

#### Partition sizes compared to DGL

Compared to `DGL`, `GraphBolt` partitions are much smaller(reduced to **16%** and **19%** for `ogbn-products` and `ogbn-papers100M` respectively).

`ogbn-products`

| Data Formats | File Name | Part 0 | Part 1 |
| ------------ | ---------------------------- | ------ | ------ |
| DGL | graph.dgl | 1.5GB | 1.6GB |
| GraphBolt | fused_csc_sampling_graph.pt | 255MB | 265MB |

`ogbn-papers100M`

| Data Formats | File Name | Part 0 | Part 1 |
| ------------ | ---------------------------- | ------ | ------ |
| DGL | graph.dgl | 23GB | 22GB |
| GraphBolt | fused_csc_sampling_graph.pt | 4.4GB | 4.1GB |

Then run example with `--use_graphbolt`.

```
python3 ~/workspace/dgl/tools/launch.py \
--workspace ~/workspace/dgl/examples/pytorch/graphsage/dist/ \
--num_trainers 4 \
--num_samplers 0 \
--num_servers 2 \
--part_config data/ogbn-products.json \
--ip_config ip_config.txt \
"python3 node_classification.py --graph_name ogbn-products --ip_config ip_config.txt --num_epochs 10 --use_graphbolt"
```

#### Performance compared to `DGL`

Compared to `DGL`, `GraphBolt`'s sampler works faster(reduced to **80%** and **77%** for `ogbn-products` and `ogbn-papers100M` respectively). `Min` and `Max` are statistics of all trainers on all nodes(machines).

As for RAM usage, the shared memory(measured by **shared** field of `free` command) usage is decreased due to smaller graph partitions in `GraphBolt` though the peak memory used by processes(measured by **used** field of `free` command) does not decrease.

`ogbn-products`

| Data Formats | Sample Time Per Epoch (CPU) | Test Accuracy (10 epochs) | shared | used (peak) |
| ------------ | --------------------------- | -------------------------------- | ----- | ---- |
| DGL | Min: 1.2884s, Max: 1.4159s | Min: 64.38%, Max: 70.42% | 2.4GB | 7.8GB|
| GraphBolt | Min: 1.0589s, Max: 1.1400s | Min: 61.68%, Max: 71.23% | 1.1GB | 7.8GB|


`ogbn-papers100M`

| Data Formats | Sample Time Per Epoch (CPU) | Test Accuracy (10 epochs) | shared | used (peak) |
| ------------ | --------------------------- | -------------------------------- | ----- | ---- |
| DGL | Min: 5.5570s, Max: 6.1900s | Min: 29.12%, Max: 34.33% | 84GB | 43GB |
| GraphBolt | Min: 4.5046s, Max: 4.7718s | Min: 29.11%, Max: 33.49% | 67GB | 43GB |
7 changes: 6 additions & 1 deletion examples/distributed/graphsage/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def main(args):
"""
host_name = socket.gethostname()
print(f"{host_name}: Initializing DistDGL.")
dgl.distributed.initialize(args.ip_config)
dgl.distributed.initialize(args.ip_config, use_graphbolt=args.use_graphbolt)
print(f"{host_name}: Initializing PyTorch process group.")
th.distributed.init_process_group(backend=args.backend)
print(f"{host_name}: Initializing DistGraph.")
Expand Down Expand Up @@ -457,6 +457,11 @@ def main(args):
help="Pad train nid to the same length across machine, to ensure num "
"of batches to be the same.",
)
parser.add_argument(
"--use_graphbolt",
action="store_true",
help="Use GraphBolt for distributed train.",
)
args = parser.parse_args()
print(f"Arguments: {args}")
main(args)
6 changes: 6 additions & 0 deletions examples/distributed/graphsage/partition_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ def load_ogb(name, root="dataset"):
default="data",
help="Output path of partitioned graph.",
)
argparser.add_argument(
"--use_graphbolt",
action="store_true",
help="Use GraphBolt for distributed train.",
)
args = argparser.parse_args()

start = time.time()
Expand Down Expand Up @@ -127,4 +132,5 @@ def load_ogb(name, root="dataset"):
balance_ntypes=balance_ntypes,
balance_edges=args.balance_edges,
num_trainers_per_machine=args.num_trainers_per_machine,
use_graphbolt=args.use_graphbolt,
)

0 comments on commit c259d1b

Please sign in to comment.