# Python notebook 

## Graph Algorithms
Amazon co-purchase data analysis

https://neo4j-contrib.github.io/neo4j-graph-algorithms/#_algorithms_2


In [1]:
import matplotlib.pyplot as plt
import networkx as nx
import pickle
import pandas as pd
from neo4jrestclient.client import GraphDatabase
from neo4jrestclient.constants import RAW
from neo4jrestclient.client import Node 

In [2]:
q = """MATCH p=(n:Product) OPTIONAL MATCH (n)-[r:co_pur]->() RETURN n.id, n.status, n.sales_rank, count(r);"""

In [3]:
def neo4(q):
    from neo4jrestclient.client import GraphDatabase
    from neo4jrestclient.constants import RAW
    from neo4jrestclient.client import Node 
    db = GraphDatabase("http://localhost:7474/db/data")
    #retrieve a list of all nodes with the labels
    params = {}
    results = db.query(q, params=params, returns=RAW)
    records = []
    for prod in results:
        records.append(prod)     
    return records    

In [4]:
q = """MATCH p=(n:Product) OPTIONAL MATCH (n)-[r:co_pur]->() RETURN n.id, n.status, n.sales_rank, count(r);"""
records = neo4(q)
cols = ['n.id', 'n.status', 'n.sales_rank', 'count_sim']
df = pd.DataFrame.from_records(records, columns=cols)  

In [6]:
#make a stream of all triangle relationships
q = "CALL algo.triangle.stream('Product','co_pur') yield nodeA,nodeB,nodeC;"
records = neo4(q)
cols = ['n.a', 'n.b', 'n.c']
df = pd.DataFrame.from_records(records, columns=cols)
df.shape
df.head()

Unnamed: 0,n.a,n.b,n.c
0,274291,335390,335391
1,274291,335390,335391
2,274291,333213,335390
3,274291,333213,335391
4,274291,333213,333214


counts the number of triangles a node is member of and writes it back. Returns total triangle count and average clustering coefficient of the given graph.

In [7]:
q = """CALL algo.triangleCount('Product', 'co_pur',
{concurrency:4, write:true, writeProperty:'triangles',clusteringCoefficientProperty:'coefficient'})
YIELD loadMillis, computeMillis, writeMillis, nodeCount, triangleCount, averageClusteringCoefficient;"""
records = neo4(q)

In [8]:
cols = ['loadMillis', 'computeMillis', 'writeMillis', 'nodeCount', 'triangleCount', 'averageClusteringCoefficient']

In [9]:
df = pd.DataFrame.from_records(records, columns=cols)

In [10]:
df.head()

Unnamed: 0,loadMillis,computeMillis,writeMillis,nodeCount,triangleCount,averageClusteringCoefficient
0,5545,1641,1033,548552,14220930,0.498614


In [11]:
q = """CALL algo.triangleCount.stream('Products', 'co_pur', {concurrency:4})
YIELD nodeId, triangles;"""
records = neo4(q)
cols = ['n.id', 'n.triangles']
df = pd.DataFrame.from_records(records, columns=cols)
df.shape

(9697772, 2)

In [12]:
df.head()

Unnamed: 0,n.id,n.triangles
0,0,52
1,1,38
2,2,48
3,3,43
4,4,909


In [13]:
a = df['n.triangles'].describe()

In [14]:
a['max'],a['min'], df[df['n.triangles'] != 0].count()

(4928.0, 0.0, n.id           379814
 n.triangles    379814
 dtype: int64)

In [19]:
q = """CALL algo.pageRank.stream('Products', 'co_pur', {iterations:20, dampingFactor:0.85})
YIELD node, score
RETURN node,score order by score desc limit 5;"""
records = neo4(q)
cols = ['n.id', 'n.score']
df = pd.DataFrame.from_records(records, columns=cols)
df.shape

(5, 2)

In [20]:
df

Unnamed: 0,n.id,n.score
0,"{'metadata': {'id': 45, 'labels': ['Product']}...",582.601178
1,"{'metadata': {'id': 1038, 'labels': ['Product'...",500.177672
2,"{'metadata': {'id': 50, 'labels': ['Product']}...",496.380977
3,"{'metadata': {'id': 1039, 'labels': ['Product'...",494.316174
4,"{'metadata': {'id': 1041, 'labels': ['Product'...",427.97795


In [24]:
df.loc[0]

n.id       {'metadata': {'id': 45, 'labels': ['Product']}...
n.score                                              582.601
Name: 0, dtype: object

In [17]:
q = """CALL algo.pageRank('Product', 'co_pur', {iterations:20, dampingFactor:0.85,
write: true,writeProperty:"pagerank", concurrency:4})
YIELD nodes, iterations, loadMillis, computeMillis, writeMillis, dampingFactor, write, writeProperty;"""
records = neo4(q)
cols = ['nodes', 'iterations', 'loadMillis', 'computeMillis', 'writeMillis', 'dampingFactor', 'write', 'writeProperty']
df = pd.DataFrame.from_records(records, columns=cols)
df.shape

(1, 8)

In [18]:
df

Unnamed: 0,nodes,iterations,loadMillis,computeMillis,writeMillis,dampingFactor,write,writeProperty
0,548552,20,885,435,206,0.85,True,pagerank


q = """CALL algo.betweenness('Product','co_pur', {direction:'out',write:true, writeProperty:'centrality', concurrency:4})
YIELD nodes, minCentrality, maxCentrality, sumCentrality, loadMillis, computeMillis, writeMillis;"""

records = neo4(q)
cols = ['nodes', 'minCentrality', 'maxCentrality', 'sumCentrality', 'loadMillis', 'computeMillis', 'writeMillis']
df = pd.DataFrame.from_records(records, columns=cols)
df.shape


q = """CALL algo.closeness('Product', 'co_pur', {write:true, writeProperty:'close_centrality', concurrency:4})
YIELD nodes,loadMillis, computeMillis, writeMillis;"""
records = neo4(q)
cols = ['nodes', 'loadMillis', 'computeMillis', 'writeMillis']
df = pd.DataFrame.from_records(records, columns=cols)
df.shape