# Live GDS Graph Migration
> powered by neo4j-arrow 🏹


In [1]:
%pip install pyarrow==5.0.0 networkx==2.5.1
%pip install --force-reinstall git+https://github.com/voutilad/gds-python.git

import neo4j_arrow as na
from gds_python import GDS

You should consider upgrading via the '/home/dave/jupyter/venv/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
Collecting git+https://github.com/voutilad/gds-python.git
  Cloning https://github.com/voutilad/gds-python.git to /tmp/pip-req-build-1alxm590
  Running command git clone -q https://github.com/voutilad/gds-python.git /tmp/pip-req-build-1alxm590
  Resolved https://github.com/voutilad/gds-python.git to commit 951bba4fbea66ae0fbf16def453b657ce7f2a3b5
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
[?25hCollecting networkx==2.5.1
  Using cached networkx-2.5.1-py3-none-any.whl (1.6 MB)
Collecting neo4j<5.0.0,>=4.2.1
  Using cached neo4j-4.3.7-py3-none-any.whl
Collecting decorator<5,>=4.3
  Using cached decorator-4.4.2-py2.py3-none-any.whl (9.2 kB)
Collecting pytz
  Using cached pytz-2021.3-py2.py3-none-a

## We'll be using 2 Neo4j instances

In [2]:
SOURCE = 'my-arrow-source-host'
TARGET = 'my-arrow-target-host'
GRAPH = 'identity'

- **SOURCE** is colocated with our Jupyter kernel in Montreal ⛄.
- **TARGET** is in a different GCP region (somewhere in Iowa 🌽).

### Create our source Graph Projection

We'll make a simple graph projection and run FastRP on it to get some additional data. Easy peasy:


In [3]:
gds = GDS(f'bolt://{SOURCE}:7687', 'neo4j', 'password').connect()

if gds.graph.exists(GRAPH)[0]['exists']:
    gds.graph.drop(GRAPH)

gds.graph.create(GRAPH,
                 ['Client', 'Email', 'SSN', 'Phone'],
                 {
                      'HAS_EMAIL': { 'orientation': 'UNDIRECTED' },
                      'HAS_SSN': { 'orientation': 'UNDIRECTED' },
                      'HAS_PHONE': { 'orientation': 'UNDIRECTED' }
                  },
                 { 'readConcurrency': 58 })

gds.fastRP.mutate(GRAPH, {
    'embeddingDimension': 256,
    'mutateProperty': 'fastRp',
    'concurrency': 58
})


[{'nodePropertiesWritten': 45169,
  'mutateMillis': 0,
  'nodeCount': 45169,
  'createMillis': 0,
  'computeMillis': 30,
  'configuration': {'normalizationStrength': 0.0,
   'iterationWeights': [0.0, 1.0, 1.0],
   'embeddingDimension': 256,
   'relationshipWeightProperty': None,
   'nodeLabels': ['*'],
   'sudo': False,
   'relationshipTypes': ['*'],
   'mutateProperty': 'fastRp',
   'username': None,
   'concurrency': 58}}]

## Make Sure our Target is Blank

In [4]:
gds_target = GDS(f'bolt://{TARGET}:7687', 'neo4j', 'password').connect()

if gds_target.graph.exists(GRAPH)[0]['exists']:
    gds_target.graph.drop(GRAPH)
print('clean and ready!')

clean and ready!


## Let's move a Graph!

### First we'll create our `neo4j-arrow` clients

In [5]:
source = na.Neo4jArrow('neo4j', 'password', (SOURCE, 9999), tls=True, verifyTls=False)
target = na.Neo4jArrow('neo4j', 'password', (TARGET, 9999))


### Now let's move the nodes...

In [6]:
nodes = source.stream(source.gds_nodes(GRAPH, properties=['fastRp']))
ticket = target.gds_write_nodes(GRAPH)
rows, nbytes = target.put_stream_batches(ticket, nodes)
print(f'migrated {rows:,} nodes ({(nbytes >> 20):,} MiB)')

wrote 52 batches, 47,190,689 bytes
migrated 52 nodes (45 MiB)


## And now the relationships!

In [8]:
rels = source.stream(source.gds_relationships(GRAPH))
ticket = target.gds_write_relationships(GRAPH)
rows, nbytes = target.put_stream_batches(ticket, rels)
print(f'migrated {rows:,} relationships ({(nbytes >> 20):,} MiB)')

wrote 52 batches, 2,736,232 bytes
migrated 52 relationships (2 MiB)


## And now let's check our work!

In [10]:
gds_target.graph.list(GRAPH)

[{'degreeDistribution': {'p99': 6,
   'min': 0,
   'max': 6,
   'mean': 2.9940888662578318,
   'p90': 6,
   'p50': 2,
   'p999': 6,
   'p95': 6,
   'p75': 4},
  'graphName': 'identity',
  'database': 'neo4j',
  'memoryUsage': '391 MiB',
  'sizeInBytes': 410454064,
  'nodeProjection': None,
  'relationshipProjection': None,
  'nodeQuery': None,
  'relationshipQuery': None,
  'nodeCount': 45169,
  'relationshipCount': 67620,
  'nodeFilter': None,
  'relationshipFilter': None,
  'density': 3.3143916780218646e-05,
  'creationTime': neo4j.time.DateTime(2021, 10, 26, 15, 4, 53.810041, tzinfo=<StaticTzInfo 'Etc/UTC'>),
  'modificationTime': neo4j.time.DateTime(2021, 10, 26, 15, 4, 49.487811, tzinfo=<StaticTzInfo 'Etc/UTC'>),
  'schema': {'relationships': {'HAS_SSN': {},
    'HAS_EMAIL': {},
    'HAS_PHONE': {}},
   'nodes': {'Email': {'fastRp': 'List of Float (DefaultValue(null), TRANSIENT)'},
    'Client': {'fastRp': 'List of Float (DefaultValue(null), TRANSIENT)'},
    'Phone': {'fastRp': '