Merge pull request #12 from arangoml/code-quality

Code quality
arangoml · Nov 7, 2022 · dff3846 · dff3846
2 parents 72de5da + b2a3dd7
commit dff3846
Show file tree

Hide file tree

Showing 21 changed files with 1,395 additions and 644 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -2,21 +2,65 @@ name: build
 on:
   workflow_dispatch:
   push:
-    branches: [ master ]
+    branches: [ main ]
   pull_request:
-    branches: [ master ]
+    branches: [ main ]
+env:
+  PACKAGE_DIR: fastgraphml
+  TESTS_DIR: tests
+  CONDA_ENV: fastgraphml
 jobs:
-  build:
+  lint:
     runs-on: self-hosted
     defaults:
       run:
         shell: bash -l {0}
-    name: gpu
+    strategy:
+      matrix:
+        python: ["3.8"]
+    name: Lint - Python ${{ matrix.python }}
     steps:
       - uses: actions/checkout@v2
       - name: Activating conda env
         run: |
           source ~/miniconda3/etc/profile.d/conda.sh 
-          conda activate fastgraphml
-      - name: Run pytest in conda env
-        run: conda run -n fastgraphml pytest 
+          conda activate ${{env.CONDA_ENV}}
+      - name: Setup Python ${{ matrix.python }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
+      - name: Run black
+        run: conda run -n ${{env.CONDA_ENV}} black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run flake8
+        run: conda run -n ${{env.CONDA_ENV}} flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run isort
+        run: conda run -n ${{env.CONDA_ENV}} isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run mypy
+        run: conda run -n ${{env.CONDA_ENV}} mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run bandit
+        run: conda run -n ${{env.CONDA_ENV}} bandit --exclude "./tests/*" --recursive ./
+  test:
+    runs-on: self-hosted
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      matrix:
+        python: ["3.8"]
+    name: Test - Python ${{ matrix.python }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Python ${{ matrix.python }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Activating conda env
+        run: |
+          source ~/miniconda3/etc/profile.d/conda.sh 
+          conda activate ${{env.CONDA_ENV}}
+      - name: Install packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
+      - name: Run pytest
+        run: conda run -n ${{env.CONDA_ENV}} pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes --cov-fail-under=75
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,155 @@
+name: release
+on:
+  workflow_dispatch:
+  release:
+    types: [published]
+env:
+  PACKAGE_DIR: fastgraphml
+  TESTS_DIR: tests
+  CONDA_ENV: fastgraphml
+jobs:
+  lint:
+    runs-on: self-hosted
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      matrix:
+        python: ["3.8"]
+    name: Lint - Python ${{ matrix.python }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Activating conda env
+        run: |
+          source ~/miniconda3/etc/profile.d/conda.sh 
+          conda activate ${{env.CONDA_ENV}}
+      - name: Setup Python ${{ matrix.python }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
+      - name: Run black
+        run: conda run -n ${{env.CONDA_ENV}} black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run flake8
+        run: conda run -n ${{env.CONDA_ENV}} flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run isort
+        run: conda run -n ${{env.CONDA_ENV}} isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run mypy
+        run: conda run -n ${{env.CONDA_ENV}} mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
+      - name: Run bandit
+        run: conda run -n ${{env.CONDA_ENV}} bandit --exclude "./tests/*" --recursive ./
+  test:
+    needs: lint
+    runs-on: self-hosted
+    defaults:
+      run:
+        shell: bash -l {0}
+    strategy:
+      matrix:
+        python: ["3.8"]
+    name: Test - Python ${{ matrix.python }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Python ${{ matrix.python }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Activating conda env
+        run: |
+          source ~/miniconda3/etc/profile.d/conda.sh 
+          conda activate ${{env.CONDA_ENV}}
+      - name: Install packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
+      - name: Run pytest
+        run: conda run -n ${{env.CONDA_ENV}} pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes --cov-fail-under=75
+  release:
+    needs: test
+    runs-on: ubuntu-latest
+    name: Release package
+    steps:
+      - name: Activating conda env
+        run: |
+          source ~/miniconda3/etc/profile.d/conda.sh 
+          conda activate ${{env.CONDA_ENV}}
+      - uses: actions/checkout@v2
+
+      - name: Fetch complete history for all tags and branches
+        run: git fetch --prune --unshallow
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      - name: Install release packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install setuptools wheel twine setuptools-scm[toml]
+
+      - name: Install packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install .[dev]
+
+      - name: Build distribution
+        run: conda run -n ${{env.CONDA_ENV}} python setup.py sdist bdist_wheel
+
+      - name: Publish to PyPI Test
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }}
+        run: conda run -n ${{env.CONDA_ENV}} twine upload --repository testpypi dist/* #--skip-existing
+      - name: Publish to PyPI
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
+        run: conda run -n ${{env.CONDA_ENV}} twine upload --repository pypi dist/* #--skip-existing
+
+  changelog:
+    needs: release
+    runs-on: ubuntu-latest
+    name: Update Changelog
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Create new branch
+        run: git checkout -b actions/changelog
+
+      - name: Set branch upstream
+        run: git push -u origin actions/changelog
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      - name: Install release packages
+        run: conda run -n ${{env.CONDA_ENV}} pip install wheel gitchangelog pystache
+
+      - name: Set variables
+        run: echo "VERSION=$(curl ${GITHUB_API_URL}/repos/${GITHUB_REPOSITORY}/releases/latest | python -c "import sys; import json; print(json.load(sys.stdin)['tag_name'])")" >> $GITHUB_ENV
+
+      - name: Generate newest changelog
+        run: gitchangelog ${{env.VERSION}} > CHANGELOG.md
+
+      - name: Make commit for auto-generated changelog
+        uses: EndBug/add-and-commit@v7
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          add: "CHANGELOG.md"
+          branch: actions/changelog
+          message: "!gitchangelog"
+
+      - name: Create pull request for the auto generated changelog
+        run: |
+          echo "PR_URL=$(gh pr create \
+            --title "changelog: release ${{env.VERSION}}" \
+            --body "beep boop, i am a robot" \
+            --label documentation)" >> $GITHUB_ENV
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Alert developer of open PR
+        run: echo "Changelog $PR_URL is ready to be merged by developer."
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,6 @@
 __pycache__/
+build/
+fastgraphml.egg-info/
+fastgraphml/.DS_Store
+dist/
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,33 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: check-merge-conflict
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
+-   repo: https://github.com/psf/black
+    rev: 22.8.0
+    hooks:
+    -   id: black
+        args:
+            - -l 88
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v0.982"
+    hooks:
+    -   id: mypy
+        additional_dependencies: [types-requests]
+        exclude: ^tests/
+-   repo: https://github.com/PyCQA/flake8
+    rev: 5.0.4
+    hooks:
+    -   id: flake8
+        args: 
+            - "--max-line-length=88"
+            - "--ignore=E203, W503, E251"
+-   repo: https://github.com/PyCQA/bandit
+    rev: 1.7.4
+    hooks:
+    -   id: bandit
+        exclude: ^tests/
diff --git a/README.md b/README.md
@@ -2,12 +2,13 @@
 Given an input graph it generates Graph Embeddings using Low-Code framework built on top of [PyG](https://pytorch-geometric.readthedocs.io/en/latest/). The package supports training on both GPU and CPU enabled machines. Training jobs on GPUs results in much faster execution and increased in performance when it comes to handling large graphs as compared to CPUs. In addition, the framework provides tight integration with  [ArangoDB](https://www.arangodb.com/) which is a scalable, fully managed graph database, document store and search engine in one place. Once Graph Embeddings are generated, they can be used for various downstream machine learning tasks like Node Classification, Link Prediction, Visualisation, Community Detection, Similartiy Search, Recommendation, etc. 
 
 ## Installation
-#### Additional Dependencies
-1. [pytorch](https://pytorch.org/) 
-2. [pyg](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) 
-3. [FAISS](https://github.com/facebookresearch/faiss/blob/main/INSTALL.md) 
-
-Note: For FAISS-CPU one needs numba==0.53.0
+#### Required Dependencies
+1. PyTorch `1.12.*` is required.
+    * Install using previous version that matches your CUDA version: [pytorch](https://pytorch.org/get-started/previous-versions/)
+        * To find your installed CUDA version run `nvidia-smi` in your terminal.
+2. [pyg](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html)
+3. [FAISS](https://github.com/facebookresearch/faiss/blob/main/INSTALL.md)
+    * Note: For FAISS-CPU one needs `numba==0.53.0`
 
 #### Latest Release
 ```
@@ -28,43 +29,55 @@ git clone https://github.com/arangoml/fastgraphml.git
 
 ```python
 from fastgraphml.graph_embeddings import SAGE, GAT
-from fastgraphml.graph_embeddings import downstream_tasks 
+from fastgraphml.graph_embeddings import downstream_tasks
+from fastgraphml import Datasets 
 from arango import ArangoClient
 
 # Initialize the ArangoDB client.
 client = ArangoClient("http://127.0.0.1:8529")
-db = client.db('_system', username='root')
+db = client.db('_system', username='root', password='openSesame')
+
+# Loading Amazon Computer Products dataset into ArangoDB
+Datasets(db).load("AMAZON_COMPUTER_PRODUCTS")
+
+# Optionally use arangodb graph
+# arango_graph = db.graph('product_graph')
 
-# arangodb graph name
-arango_graph = db.graph('cora_graph')
 # metadata information of arango_graph
 metagraph = {
     "vertexCollections": {
-        "Paper": {"x": "features", "y": "label"},
+        "Computer_Products": {"x": "features", "y": "label"},
     },
     "edgeCollections": {
-        "Cites": {},
+        "bought_together": {},
     },
 }
 
 # generating graph embeddings with 3 lines of code
-model = SAGE(db, arango_graph, metagraph, embedding_size=64) # define graph embedding model
-model._train(model, epochs=10) # train
-embeddings = model.get_embeddings(model=model) # get embeddings
+model = SAGE(db,'product_graph', metagraph, embedding_size=64) # define graph embedding model
+model._train(epochs=10) # train
+embeddings = model.get_embeddings() # get embeddings
 ```
 
 #### Example Heterogeneous Graphs
 
 ```python
 from fastgraphml.graph_embeddings import METAPATH2VEC, DMGI
 from fastgraphml.graph_embeddings import downstream_tasks 
+from fastgraphml import Datasets 
+
 from arango import ArangoClient
 
 # Initialize the ArangoDB client.
 client = ArangoClient("http://127.0.0.1:8529")
 db = client.db('_system', username='root')
 
-arango_graph = db.graph("IMDB")
+# Loading IMDB Dataset into ArangoDB
+Datasets(db).load("IMDB_X")
+
+# Optionally use ArangoDB Graph
+# arango_graph = db.graph("IMDB")
+
 metagraph = {
     "vertexCollections": {
 
@@ -80,7 +93,7 @@ metapaths = [('movie', 'to','actor'),
              ('actor', 'to', 'movie'), ] # MAM # co-actor relationship
 
 # generating graph embeddings with 3 lines of code
-model = METAPATH2VEC(db, arango_graph, metagraph, metapaths, key_node='movie', embedding_size=128,
+model = METAPATH2VEC(db, "IMDB_X", metagraph, metapaths, key_node='movie', embedding_size=128,
                      walk_length=5, context_size=6, walks_per_node=5, num_negative_samples=5,
                      sparse=True) # define model
 model._train(epochs=10, lr=0.03) # train
@@ -100,8 +113,8 @@ data = dataset[0]
 
 # generating graph embeddings with 3 lines of code
 model = SAGE(pyg_graph=data, embedding_size=64) # define graph embedding model
-model._train(model, epochs=10) # train
-embeddings = model.get_embeddings(model=model) # get embeddings
+model._train(epochs=10) # train
+embeddings = model.get_embeddings() # get embeddings
 ```
 ## Models Supported
 

diff --git a/examples/sage_amazon_computer.ipynb b/examples/sage_amazon_computer.ipynb
@@ -84,8 +84,8 @@
    "source": [
     "# generating graph embeddings with 3 lines of code\n",
     "model = SAGE(db, arango_graph, metagraph, embedding_size=256) # define graph embedding model\n",
-    "model._train(model, epochs=6, lr=0.0001) # train\n",
-    "embeddings = model.get_embeddings(model=model) # get embeddings"
+    "model._train(epochs=6, lr=0.0001) # train\n",
+    "embeddings = model.get_embeddings() # get embeddings"
    ]
   },
   {

diff --git a/fastgraphml/__init__.py b/fastgraphml/__init__.py
@@ -1,4 +1,8 @@
+from arango_datasets.datasets import Datasets
+
+from fastgraphml.graph_embeddings.models.dmgi import DMGI
+from fastgraphml.graph_embeddings.models.gat import GAT
 from fastgraphml.graph_embeddings.models.graph_sage import SAGE
-from fastgraphml.graph_embeddings.models.gat import GAT    
 from fastgraphml.graph_embeddings.models.metapath2vec import METAPATH2VEC
-from fastgraphml.graph_embeddings.models.dmgi import DMGI
+
+__all__ = ["DMGI", "GAT", "SAGE", "METAPATH2VEC", "Datasets"]