Skip to content

Commit

Permalink
Merge pull request #12 from arangoml/code-quality
Browse files Browse the repository at this point in the history
Code quality
  • Loading branch information
cw00dw0rd committed Nov 7, 2022
2 parents 72de5da + b2a3dd7 commit dff3846
Show file tree
Hide file tree
Showing 21 changed files with 1,395 additions and 644 deletions.
58 changes: 51 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,65 @@ name: build
on:
workflow_dispatch:
push:
branches: [ master ]
branches: [ main ]
pull_request:
branches: [ master ]
branches: [ main ]
env:
PACKAGE_DIR: fastgraphml
TESTS_DIR: tests
CONDA_ENV: fastgraphml
jobs:
build:
lint:
runs-on: self-hosted
defaults:
run:
shell: bash -l {0}
name: gpu
strategy:
matrix:
python: ["3.8"]
name: Lint - Python ${{ matrix.python }}
steps:
- uses: actions/checkout@v2
- name: Activating conda env
run: |
source ~/miniconda3/etc/profile.d/conda.sh
conda activate fastgraphml
- name: Run pytest in conda env
run: conda run -n fastgraphml pytest
conda activate ${{env.CONDA_ENV}}
- name: Setup Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Install packages
run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
- name: Run black
run: conda run -n ${{env.CONDA_ENV}} black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run flake8
run: conda run -n ${{env.CONDA_ENV}} flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run isort
run: conda run -n ${{env.CONDA_ENV}} isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run mypy
run: conda run -n ${{env.CONDA_ENV}} mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run bandit
run: conda run -n ${{env.CONDA_ENV}} bandit --exclude "./tests/*" --recursive ./
test:
runs-on: self-hosted
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python: ["3.8"]
name: Test - Python ${{ matrix.python }}
steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Activating conda env
run: |
source ~/miniconda3/etc/profile.d/conda.sh
conda activate ${{env.CONDA_ENV}}
- name: Install packages
run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
- name: Run pytest
run: conda run -n ${{env.CONDA_ENV}} pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes --cov-fail-under=75
155 changes: 155 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
name: release
on:
workflow_dispatch:
release:
types: [published]
env:
PACKAGE_DIR: fastgraphml
TESTS_DIR: tests
CONDA_ENV: fastgraphml
jobs:
lint:
runs-on: self-hosted
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python: ["3.8"]
name: Lint - Python ${{ matrix.python }}
steps:
- uses: actions/checkout@v2
- name: Activating conda env
run: |
source ~/miniconda3/etc/profile.d/conda.sh
conda activate ${{env.CONDA_ENV}}
- name: Setup Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Install packages
run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
- name: Run black
run: conda run -n ${{env.CONDA_ENV}} black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run flake8
run: conda run -n ${{env.CONDA_ENV}} flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run isort
run: conda run -n ${{env.CONDA_ENV}} isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run mypy
run: conda run -n ${{env.CONDA_ENV}} mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
- name: Run bandit
run: conda run -n ${{env.CONDA_ENV}} bandit --exclude "./tests/*" --recursive ./
test:
needs: lint
runs-on: self-hosted
defaults:
run:
shell: bash -l {0}
strategy:
matrix:
python: ["3.8"]
name: Test - Python ${{ matrix.python }}
steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python }}
- name: Activating conda env
run: |
source ~/miniconda3/etc/profile.d/conda.sh
conda activate ${{env.CONDA_ENV}}
- name: Install packages
run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
- name: Run pytest
run: conda run -n ${{env.CONDA_ENV}} pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes --cov-fail-under=75
release:
needs: test
runs-on: ubuntu-latest
name: Release package
steps:
- name: Activating conda env
run: |
source ~/miniconda3/etc/profile.d/conda.sh
conda activate ${{env.CONDA_ENV}}
- uses: actions/checkout@v2

- name: Fetch complete history for all tags and branches
run: git fetch --prune --unshallow

- name: Setup python
uses: actions/setup-python@v2
with:
python-version: "3.8"

- name: Install release packages
run: conda run -n ${{env.CONDA_ENV}} pip install setuptools wheel twine setuptools-scm[toml]

- name: Install packages
run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]

- name: Build distribution
run: conda run -n ${{env.CONDA_ENV}} python setup.py sdist bdist_wheel

- name: Publish to PyPI Test
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }}
run: conda run -n ${{env.CONDA_ENV}} twine upload --repository testpypi dist/* #--skip-existing
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
run: conda run -n ${{env.CONDA_ENV}} twine upload --repository pypi dist/* #--skip-existing

changelog:
needs: release
runs-on: ubuntu-latest
name: Update Changelog
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Create new branch
run: git checkout -b actions/changelog

- name: Set branch upstream
run: git push -u origin actions/changelog
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Setup python
uses: actions/setup-python@v2
with:
python-version: "3.8"

- name: Install release packages
run: conda run -n ${{env.CONDA_ENV}} pip install wheel gitchangelog pystache

- name: Set variables
run: echo "VERSION=$(curl ${GITHUB_API_URL}/repos/${GITHUB_REPOSITORY}/releases/latest | python -c "import sys; import json; print(json.load(sys.stdin)['tag_name'])")" >> $GITHUB_ENV

- name: Generate newest changelog
run: gitchangelog ${{env.VERSION}} > CHANGELOG.md

- name: Make commit for auto-generated changelog
uses: EndBug/add-and-commit@v7
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
add: "CHANGELOG.md"
branch: actions/changelog
message: "!gitchangelog"

- name: Create pull request for the auto generated changelog
run: |
echo "PR_URL=$(gh pr create \
--title "changelog: release ${{env.VERSION}}" \
--body "beep boop, i am a robot" \
--label documentation)" >> $GITHUB_ENV
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Alert developer of open PR
run: echo "Changelog $PR_URL is ready to be merged by developer."
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
__pycache__/
build/
fastgraphml.egg-info/
fastgraphml/.DS_Store
dist/

33 changes: 33 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: check-merge-conflict
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/psf/black
rev: 22.8.0
hooks:
- id: black
args:
- -l 88
- repo: https://github.com/pre-commit/mirrors-mypy
rev: "v0.982"
hooks:
- id: mypy
additional_dependencies: [types-requests]
exclude: ^tests/
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
hooks:
- id: flake8
args:
- "--max-line-length=88"
- "--ignore=E203, W503, E251"
- repo: https://github.com/PyCQA/bandit
rev: 1.7.4
hooks:
- id: bandit
exclude: ^tests/
51 changes: 32 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
Given an input graph it generates Graph Embeddings using Low-Code framework built on top of [PyG](https://pytorch-geometric.readthedocs.io/en/latest/). The package supports training on both GPU and CPU enabled machines. Training jobs on GPUs results in much faster execution and increased in performance when it comes to handling large graphs as compared to CPUs. In addition, the framework provides tight integration with [ArangoDB](https://www.arangodb.com/) which is a scalable, fully managed graph database, document store and search engine in one place. Once Graph Embeddings are generated, they can be used for various downstream machine learning tasks like Node Classification, Link Prediction, Visualisation, Community Detection, Similartiy Search, Recommendation, etc.

## Installation
#### Additional Dependencies
1. [pytorch](https://pytorch.org/)
2. [pyg](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)
3. [FAISS](https://github.com/facebookresearch/faiss/blob/main/INSTALL.md)

Note: For FAISS-CPU one needs numba==0.53.0
#### Required Dependencies
1. PyTorch `1.12.*` is required.
* Install using previous version that matches your CUDA version: [pytorch](https://pytorch.org/get-started/previous-versions/)
* To find your installed CUDA version run `nvidia-smi` in your terminal.
2. [pyg](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)
3. [FAISS](https://github.com/facebookresearch/faiss/blob/main/INSTALL.md)
* Note: For FAISS-CPU one needs `numba==0.53.0`

#### Latest Release
```
Expand All @@ -28,43 +29,55 @@ git clone https://github.com/arangoml/fastgraphml.git

```python
from fastgraphml.graph_embeddings import SAGE, GAT
from fastgraphml.graph_embeddings import downstream_tasks
from fastgraphml.graph_embeddings import downstream_tasks
from fastgraphml import Datasets
from arango import ArangoClient

# Initialize the ArangoDB client.
client = ArangoClient("http://127.0.0.1:8529")
db = client.db('_system', username='root')
db = client.db('_system', username='root', password='openSesame')

# Loading Amazon Computer Products dataset into ArangoDB
Datasets(db).load("AMAZON_COMPUTER_PRODUCTS")

# Optionally use arangodb graph
# arango_graph = db.graph('product_graph')

# arangodb graph name
arango_graph = db.graph('cora_graph')
# metadata information of arango_graph
metagraph = {
"vertexCollections": {
"Paper": {"x": "features", "y": "label"},
"Computer_Products": {"x": "features", "y": "label"},
},
"edgeCollections": {
"Cites": {},
"bought_together": {},
},
}

# generating graph embeddings with 3 lines of code
model = SAGE(db, arango_graph, metagraph, embedding_size=64) # define graph embedding model
model._train(model, epochs=10) # train
embeddings = model.get_embeddings(model=model) # get embeddings
model = SAGE(db,'product_graph', metagraph, embedding_size=64) # define graph embedding model
model._train(epochs=10) # train
embeddings = model.get_embeddings() # get embeddings
```

#### Example Heterogeneous Graphs

```python
from fastgraphml.graph_embeddings import METAPATH2VEC, DMGI
from fastgraphml.graph_embeddings import downstream_tasks
from fastgraphml import Datasets

from arango import ArangoClient

# Initialize the ArangoDB client.
client = ArangoClient("http://127.0.0.1:8529")
db = client.db('_system', username='root')

arango_graph = db.graph("IMDB")
# Loading IMDB Dataset into ArangoDB
Datasets(db).load("IMDB_X")

# Optionally use ArangoDB Graph
# arango_graph = db.graph("IMDB")

metagraph = {
"vertexCollections": {

Expand All @@ -80,7 +93,7 @@ metapaths = [('movie', 'to','actor'),
('actor', 'to', 'movie'), ] # MAM # co-actor relationship

# generating graph embeddings with 3 lines of code
model = METAPATH2VEC(db, arango_graph, metagraph, metapaths, key_node='movie', embedding_size=128,
model = METAPATH2VEC(db, "IMDB_X", metagraph, metapaths, key_node='movie', embedding_size=128,
walk_length=5, context_size=6, walks_per_node=5, num_negative_samples=5,
sparse=True) # define model
model._train(epochs=10, lr=0.03) # train
Expand All @@ -100,8 +113,8 @@ data = dataset[0]

# generating graph embeddings with 3 lines of code
model = SAGE(pyg_graph=data, embedding_size=64) # define graph embedding model
model._train(model, epochs=10) # train
embeddings = model.get_embeddings(model=model) # get embeddings
model._train(epochs=10) # train
embeddings = model.get_embeddings() # get embeddings
```
## Models Supported

Expand Down
4 changes: 2 additions & 2 deletions examples/sage_amazon_computer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@
"source": [
"# generating graph embeddings with 3 lines of code\n",
"model = SAGE(db, arango_graph, metagraph, embedding_size=256) # define graph embedding model\n",
"model._train(model, epochs=6, lr=0.0001) # train\n",
"embeddings = model.get_embeddings(model=model) # get embeddings"
"model._train(epochs=6, lr=0.0001) # train\n",
"embeddings = model.get_embeddings() # get embeddings"
]
},
{
Expand Down
8 changes: 6 additions & 2 deletions fastgraphml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from arango_datasets.datasets import Datasets

from fastgraphml.graph_embeddings.models.dmgi import DMGI
from fastgraphml.graph_embeddings.models.gat import GAT
from fastgraphml.graph_embeddings.models.graph_sage import SAGE
from fastgraphml.graph_embeddings.models.gat import GAT
from fastgraphml.graph_embeddings.models.metapath2vec import METAPATH2VEC
from fastgraphml.graph_embeddings.models.dmgi import DMGI

__all__ = ["DMGI", "GAT", "SAGE", "METAPATH2VEC", "Datasets"]
Loading

0 comments on commit dff3846

Please sign in to comment.