# Live GDS Graph Migration
> powered by neo4j-arrow 🏹


In [1]:
%pip install pyarrow==6.0.1 networkx==2.5.1
%pip install --force-reinstall git+https://github.com/voutilad/gds-python.git

import neo4j_arrow as na
from gds_python import GDS

Note: you may need to restart the kernel to use updated packages.
Collecting git+https://github.com/voutilad/gds-python.git
  Cloning https://github.com/voutilad/gds-python.git to /tmp/pip-req-build-b_k75bye
  Running command git clone --filter=blob:none -q https://github.com/voutilad/gds-python.git /tmp/pip-req-build-b_k75bye
  Resolved https://github.com/voutilad/gds-python.git to commit 951bba4fbea66ae0fbf16def453b657ce7f2a3b5
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting neo4j<5.0.0,>=4.2.1
  Using cached neo4j-4.4.1-py3-none-any.whl
Collecting networkx==2.5.1
  Using cached networkx-2.5.1-py3-none-any.whl (1.6 MB)
Collecting decorator<5,>=4.3
  Using cached decorator-4.4.2-py2.py3-none-any.whl (9.2 kB)
Collecting pytz
  Using cached pytz-2021.3-py2.py3-none-any.whl (503 kB)
Building wheels for collected packages: gds-python
  Building wheel for gd

## We'll be using 2 Neo4j instances

In [2]:
SOURCE = 'voutila-arrow-test'
TARGET = 'voutila-arrow-sink'
GRAPH = 'movies'

- **SOURCE** is colocated with our Jupyter kernel in Montreal ⛄.
- **TARGET** is in a different GCP region (somewhere in Iowa 🌽).

### Create our source Graph Projection

We'll make a simple graph projection and run FastRP on it to get some additional data. Easy peasy:


In [3]:
gds = GDS(f'bolt://{SOURCE}:7687', 'neo4j', 'password').connect()

if gds.graph.exists(GRAPH)[0]['exists']:
    gds.graph.drop(GRAPH)

gds.graph.create(GRAPH,
                 ['Movie', 'Person'],
                 {
                      'ACTED_IN': { 'orientation': 'UNDIRECTED' },
                      'DIRECTED': { 'orientation': 'UNDIRECTED' },
                      'PRODUCED': { 'orientation': 'UNDIRECTED' }
                  },
                 { 'readConcurrency': 58 })

gds.fastRP.mutate(GRAPH, {
    'embeddingDimension': 256,
    'mutateProperty': 'fastRp',
    'concurrency': 58
})


[{'nodePropertiesWritten': 171,
  'mutateMillis': 0,
  'nodeCount': 171,
  'createMillis': 0,
  'computeMillis': 5,
  'configuration': {'nodeSelfInfluence': 0,
   'relationshipWeightProperty': None,
   'propertyRatio': 0.0,
   'concurrency': 58,
   'normalizationStrength': 0.0,
   'iterationWeights': [0.0, 1.0, 1.0],
   'embeddingDimension': 256,
   'nodeLabels': ['*'],
   'sudo': False,
   'relationshipTypes': ['*'],
   'featureProperties': [],
   'mutateProperty': 'fastRp',
   'username': None}}]

## Make Sure our Target is Blank

In [4]:
gds_target = GDS(f'bolt://{TARGET}:7687', 'neo4j', 'password').connect()

if gds_target.graph.exists(GRAPH)[0]['exists']:
    gds_target.graph.drop(GRAPH)
print('clean and ready!')

clean and ready!


## Let's move a Graph!

### First we'll create our `neo4j-arrow` clients

In [5]:
source = na.Neo4jArrow('neo4j', 'password', (SOURCE, 9999), tls=True, verify_tls=False)
target = na.Neo4jArrow('neo4j', 'password', (TARGET, 9999))

### Now let's move the nodes...

In [6]:
nodes = source.stream(source.gds_nodes(GRAPH, properties=['fastRp']))
ticket = target.gds_write_nodes(GRAPH)
rows, nbytes = target.put(ticket, nodes)
print(f'migrated {rows:,} nodes ({round(nbytes / (1 << 20), 2):,} MiB)')

wrote 171 rows, 0.17 MiB
migrated 171 nodes (0.17 MiB)


## And now the relationships!

In [7]:
rels = source.stream(source.gds_relationships(GRAPH))
ticket = target.gds_write_relationships(GRAPH)
rows, nbytes = target.put(ticket, rels)
print(f'migrated {rows:,} relationships ({round(nbytes / (1 << 20), 2):,} MiB)')

wrote 462 rows, 0.02 MiB
migrated 462 relationships (0.02 MiB)


## And now let's check our work!

In [8]:
gds_target.graph.list(GRAPH)

[{'degreeDistribution': {'p99': 22,
   'min': 0,
   'max': 26,
   'mean': 5.4035087719298245,
   'p90': 14,
   'p50': 4,
   'p999': 26,
   'p95': 16,
   'p75': 8},
  'graphName': 'movies',
  'database': 'neo4j',
  'memoryUsage': '304 MiB',
  'sizeInBytes': 319152864,
  'nodeProjection': None,
  'relationshipProjection': None,
  'nodeQuery': None,
  'relationshipQuery': None,
  'nodeCount': 171,
  'relationshipCount': 462,
  'nodeFilter': None,
  'relationshipFilter': None,
  'density': 0.015892672858617132,
  'creationTime': neo4j.time.DateTime(2022, 1, 18, 23, 4, 26, 640153000, tzinfo=<StaticTzInfo 'Etc/UTC'>),
  'modificationTime': neo4j.time.DateTime(2022, 1, 18, 23, 4, 24, 448935000, tzinfo=<StaticTzInfo 'Etc/UTC'>),
  'schema': {'relationships': {'ACTED_IN': {}, 'PRODUCED': {}, 'DIRECTED': {}},
   'nodes': {'Movie': {'fastRp': 'List of Float (DefaultValue(null), TRANSIENT)'},
    'Person': {'fastRp': 'List of Float (DefaultValue(null), TRANSIENT)'}}}}]