In [1]:
from neo4j import GraphDatabase
from neo4j.exceptions import ClientError
from sklearn.manifold import TSNE

import numpy as np
import altair as alt
import pandas as pd
import os

from module.neo4j.graph_db import GraphDB
driver = GraphDatabase.driver("bolt://localhost:11012", auth=("neo4j", "erclab"))

In [2]:
result = {"label": [], "count": []}
with driver.session() as session:
    for row in session.run("CALL db.labels()"):
        label = row["label"]
        query = f"MATCH (:`{label}`) RETURN count(*) as count"
        count = session.run(query).single()["count"]
        result["label"].append(label)
        result["count"].append(count)
nodes_df = pd.DataFrame(data=result)
nodes_df.sort_values("count")

result = {"relType": [], "count": []}
with driver.session() as session:
    for row in session.run("CALL db.relationshipTypes()"):
        relationship_type = row["relationshipType"]
        query = f"MATCH ()-[:`{relationship_type}`]->() RETURN count(*) as count"
        count = session.run(query).single()["count"]
        result["relType"].append(relationship_type)
        result["count"].append(count)
rels_df = pd.DataFrame(data=result)
rels_df.sort_values("count")

print(nodes_df)
print(rels_df)

        label  count
0        User    541
1  Restaurant     20
2      Review    555
3        City     14
4     Country      2
5        Attr     53
6      Aspect     10
7        Menu     57
8    Category     19
            relType  count
0        HAS_FRIEND     14
1        HAS_REVIEW    555
2      WRITE_REVIEW    555
3             VISIT    545
4              RATE    545
5        LOCATED_IN     34
6        HAS_ASPECT     42
7                IS    104
8   ASPECT_ATTR_FOR     55
9          HAS_MENU     81
10    MENU_ATTR_FOR     54
11            ORDER     26
12     HAS_CATEGORY     49


In [None]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.node2vec.stream({
       nodeProjection: "Restaurant",
       relationshipProjection: {
         has_menu: {
           type: "HAS_MENU",
           orientation: "UNDIRECTED"
        }
       },
       embeddingDimension: 10,
       iterations: 10,
       walkLength: 10
    })
    YIELD nodeId, embedding
    RETURN gds.util.asNode(nodeId).name AS restaurant, embedding
    """)

    embeddings_df = pd.DataFrame([dict(record) for record in result])
embeddings_df.head(20)

In [None]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.node2vec.write({
       nodeProjection: "Restaurant",
       relationshipProjection: {
         has_menu: {
           type: "HAS_MENU",
           orientation: "UNDIRECTED"
        }
       },
       embeddingDimension: 10,
       iterations: 10,
       walkLength: 10,
       writeProperty: $embeddingProperty
    })
    """, {"embeddingProperty": "embeddingNode2vec"})

    embeddings_df = pd.DataFrame([dict(record) for record in result])
embeddings_df


In [8]:
with driver.session() as session:
    result = session.run("""
    MATCH (rest:Restaurant)-[:HAS_MENU]->(menu:Menu)
    WHERE menu.name IN $menus
    RETURN rest.name AS restaurant, rest.embeddingNode2vec AS embedding, menu.name AS menu
    """, {"menus": ["Naan", "Chicken Biryani", "Goat Biryani", "Butter Chicken"]})
    X = pd.DataFrame([dict(record) for record in result])
X.head(20)

Unnamed: 0,restaurant,embedding,menu
0,Bombay Buffet Indian Cuisine,"[-0.40109696984291077, -0.39514684677124023, 0...",Chicken Biryani
1,Silver Spoon,"[0.6921097040176392, -0.2188844531774521, -0.8...",Chicken Biryani
2,Silver Spoon,"[-0.8466023802757263, -0.8893817067146301, 0.5...",Chicken Biryani
3,Silver Spoon,"[0.6921097040176392, -0.2188844531774521, -0.8...",Butter Chicken
4,Matka Indian Cuisine,"[-0.5542222261428833, 0.9125559329986572, 0.73...",Goat Biryani
5,Silver Spoon,"[0.6921097040176392, -0.2188844531774521, -0.8...",Goat Biryani
6,Matka Indian Cuisine,"[-0.5542222261428833, 0.9125559329986572, 0.73...",Naan
7,Tamarind - The Indian Kitchen,"[-0.1513296514749527, -0.06613709032535553, -0...",Naan
8,Watan Kabob,"[-0.07595448940992355, -0.43553295731544495, 0...",Naan
9,Soma Grill,"[-0.15364454686641693, 0.5874104499816895, -0....",Naan


In [9]:
with driver.session() as session:
    result = session.run("""
    MATCH (rest:Restaurant)-[:HAS_MENU]->(menu:Menu)
    RETURN rest.name AS restaurant, rest.embeddingNode2vec AS embedding, menu.name AS menu
    """)
    X = pd.DataFrame([dict(record) for record in result])
    X = X.drop(X[(X.menu == 'Supreme Dosa')].index)
X.head()

Unnamed: 0,restaurant,embedding,menu
0,Silver Spoon,"[-0.8466023802757263, -0.8893817067146301, 0.5...",Chicken Biryani
1,Silver Spoon,"[-0.8466023802757263, -0.8893817067146301, 0.5...",Chicken
2,Silver Spoon,"[-0.8466023802757263, -0.8893817067146301, 0.5...",Chicken Pieces
3,Silver Spoon,"[-0.8466023802757263, -0.8893817067146301, 0.5...",Sauce
4,Silver Spoon,"[0.3437434136867523, -0.7157214283943176, 0.02...",Nihari


In [10]:
X_embedded = TSNE(n_components=2, random_state=6).fit_transform(list(X.embedding))
restaurants = list(X.restaurant)
df = pd.DataFrame(data = {
    "restaurant": restaurants,
    "menu": X.menu,
    "x": [value[0] for value in list(X_embedded)],
    "y": [value[1] for value in list(X_embedded)]
})
df.head()

Unnamed: 0,restaurant,menu,x,y
0,Silver Spoon,Chicken Biryani,7.986657,-1.233452
1,Silver Spoon,Chicken,-1.011733,1.787852
2,Silver Spoon,Chicken Pieces,13.51158,6.005591
3,Silver Spoon,Sauce,4.592422,9.153705
4,Silver Spoon,Nihari,-50.718754,-27.227976


In [11]:
chart = alt.Chart(df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='menu',
    tooltip=['restaurant', 'menu']
).properties(width=700, height=400)
chart.save('node2vec-color.json')
chart

In [12]:
with driver.session() as session:
    result = session.run("""
    MATCH (rest:Restaurant)-[:LOCATED_IN]->(city:City)
    RETURN rest.name AS restaurant, rest.embeddingNode2vec AS embedding, city.name AS city
    """)
    X = pd.DataFrame([dict(record) for record in result])
X.head(20)

Unnamed: 0,restaurant,embedding,city
0,Silver Spoon,"[-0.8466023802757263, -0.8893817067146301, 0.5...",Pickering
1,Hakka Town,"[-0.6866604685783386, 0.9551309943199158, -0.0...",Brampton
2,Lena's Roti & Doubles,"[0.7013679146766663, 0.20366227626800537, -0.1...",Brampton
3,Silver Spoon,"[0.3437434136867523, -0.7157214283943176, 0.02...",Brampton
4,Tamarind - The Indian Kitchen,"[-0.1513296514749527, -0.06613709032535553, -0...",Toronto
5,Silver Spoon,"[0.6921097040176392, -0.2188844531774521, -0.8...",Toronto
6,Watan Kabob,"[-0.07595448940992355, -0.43553295731544495, 0...",Mississauga
7,Silver Spoon,"[-0.14401067793369293, -0.6738306283950806, -0...",Mississauga
8,Red Chillez,"[-0.12367075681686401, -0.17448410391807556, 0...",Charlotte
9,Spice South,"[0.8074242472648621, -0.6648302674293518, 0.94...",Charlotte


In [13]:
X_embedded = TSNE(n_components=2, random_state=6).fit_transform(list(X.embedding))
restaurants = list(X.restaurant)
df = pd.DataFrame(data = {
    "restaurant": restaurants,
    "city": X.city,
    "x": [value[0] for value in list(X_embedded)],
    "y": [value[1] for value in list(X_embedded)]
})
df.head(20)

Unnamed: 0,restaurant,city,x,y
0,Silver Spoon,Pickering,7.263665,19.986679
1,Hakka Town,Brampton,-33.944355,14.182649
2,Lena's Roti & Doubles,Brampton,-5.422137,-41.021759
3,Silver Spoon,Brampton,-61.135162,13.722173
4,Tamarind - The Indian Kitchen,Toronto,-24.920488,-53.653889
5,Silver Spoon,Toronto,-21.694117,-0.29121
6,Watan Kabob,Mississauga,-12.712235,16.604668
7,Silver Spoon,Mississauga,-46.295006,30.621815
8,Red Chillez,Charlotte,-2.767915,-2.682061
9,Spice South,Charlotte,-46.666927,-44.010616


In [14]:
chart = alt.Chart(df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='city',
    tooltip=['restaurant', 'city']
).properties(width=700, height=400)
chart.save('node2vec-color.json')
chart

In [33]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.node2vec.write({
       nodeProjection: "User",
       relationshipProjection: {
         has_friend: {
           type: "HAS_FRIEND",
           orientation: "UNDIRECTED"
        }
       },
       embeddingDimension: 10,
       iterations: 10,
       walkLength: 10,
       writeProperty: $embeddingProperty
    })
    """, {"embeddingProperty": "embeddingNode2vec"})

    embeddings_df = pd.DataFrame([dict(record) for record in result])
embeddings_df

Unnamed: 0,nodeCount,nodePropertiesWritten,createMillis,computeMillis,writeMillis,configuration
0,541,541,7,28,303,"{'negativeSamplingExponent': 0.75, 'initialLea..."


In [36]:
with driver.session() as session:
    result = session.run("""
    MATCH (u1:User)-[:HAS_FRIEND]->(u2:User)
    RETURN u1.name AS user_1, u1.embeddingNode2vec AS embedding, u2.name AS user_2
    """)
    X = pd.DataFrame([dict(record) for record in result])
X.head(5)

Unnamed: 0,user_1,embedding,user_2
0,Utpala,"[-0.7774031162261963, 0.026978397741913795, 0....",Umakant
1,Umakant,"[-0.08211513608694077, 0.36392906308174133, 1....",Utpala
2,Alka,"[0.573505163192749, -0.9654885530471802, -0.60...",James
3,Suvi,"[-0.4652106761932373, -1.1482294797897339, 0.4...",Nino
4,Carol,"[0.06807035952806473, -0.5864803791046143, -1....",Nikki


In [41]:
X_embedded = TSNE(n_components=2, random_state=6).fit_transform(list(X.embedding))
restaurants = list(X.user_1)
df = pd.DataFrame(data = {
    "user_1": X.user_1,
    "user_2": X.user_2,
    "x": [value[0] for value in list(X_embedded)],
    "y": [value[1] for value in list(X_embedded)]
})
df.head(200)

Unnamed: 0,user_1,user_2,x,y
0,Utpala,Umakant,23.985943,-90.52182
1,Umakant,Utpala,196.981461,-133.632812
2,Alka,James,42.126427,311.980164
3,Suvi,Nino,-120.773132,-58.216709
4,Carol,Nikki,-236.107742,92.292038
5,Aaron,Katelyn,63.579052,-249.224533
6,Jun,Betty,209.350006,190.939804
7,Anna,Long,113.392563,23.721502
8,Melissa,Emily,-64.235985,71.692924
9,Danial,Agha,-114.598076,-222.779633


In [42]:
chart = alt.Chart(df).mark_circle(size=60).encode(
    x='x',
    y='y',
    color='user_2',
    tooltip=['user_1', 'user_2']
).properties(width=700, height=400)
chart.save('node2vec-color.json')
chart

In [5]:
with driver.session() as session:
    result = session.run("""
    CALL gds.graph.create(
    'recommendation_embeddings',
    {
        User: {
            label: 'User'
        },
        Restaurant: {
            label: 'Restaurant',
            properties: ['rating']
        },
        Menu: {
            label: 'Menu'
        },
        Aspect: {
            label: 'Aspect'
        },
        Attr: {
            label: 'Attr'
        },
        City: {
            label: 'City'
        },
        Country: {
            label: 'Country'
        }
    }, {
        LOCATED_IN: {
            type: 'LOCATED_IN',
            orientation: 'UNDIRECTED'
        },
        RATE: {
            type: 'RATE',
            orientation: 'UNDIRECTED',
            properties: ['star']
        },
        HAS_FRIEND: {
            type: 'HAS_FRIEND',
            orientation: 'UNDIRECTED'
        },
        HAS_MENU: {
            type: 'HAS_MENU',
            orientation: 'UNDIRECTED'
        },
        HAS_ASPECT: {
            type: 'HAS_ASPECT',
            orientation: 'UNDIRECTED'
        },
        MENU_ATTR_FOR: {
            type: 'MENU_ATTR_FOR',
            orientation: 'UNDIRECTED'
        },
        ASPECT_ATTR_FOR: {
            type: 'ASPECT_ATTR_FOR',
            orientation: 'UNDIRECTED'
        },
        VISIT: {
            type: 'VISIT',
            orientation: 'UNDIRECTED'
        },
        ORDER: {
            type: 'ORDER',
            orientation: 'UNDIRECTED'
        },
        IS: {
            type: 'IS',
            orientation: 'UNDIRECTED'
        }
    })
    """)

#     CALL gds.graph.drop('recommendation_embeddings')
#     embeddings_df = pd.DataFrame([dict(record) for record in result])
#     embeddings_df.head(20)

In [6]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.graphSage.train(
    'recommendation_embeddings',
    {
        modelName: 'multiLabelModel',
        featureProperties: ['rating'],
        nodeLabels: ['User', 'Restaurant', 'Menu', 'Aspect', 'Attr', 'City', 'Country'],
        relationshipTypes: ['LOCATED_IN', 'RATE', 'HAS_FRIEND', 'HAS_MENU', 'HAS_ASPECT', 'MENU_ATTR_FOR', 
                            'ASPECT_ATTR_FOR', 'VISIT', 'ORDER', 'IS'],
        projectedFeatureDimension: 4
    })
    """)

In [7]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.graphSage.stream(
    'Restaurant',
    {
        modelName: 'multiLabelModel'
    })
    """)

In [8]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.graphSage.write(
    'recommendation_embeddings',
    {
        writeProperty: 'embedding',
        modelName: 'multiLabelModel'
    })
    """)
    embeddings_df = pd.DataFrame([dict(record) for record in result])
embeddings_df

Unnamed: 0,nodeCount,nodePropertiesWritten,createMillis,computeMillis,writeMillis,configuration
0,859,859,1,13,526,"{'modelName': 'multiLabelModel', 'writeConcurr..."


In [9]:
with driver.session() as session:
    result = session.run("""
    CALL gds.graph.create(
    'graphEmbedding',
    {
        User: {
            label: 'User',
            properties: ['review_count']
        },
        Restaurant: {
            label: 'Restaurant',
            properties: ['rating']
        }
    }, {
        RATE: {
            type: 'RATE',
            orientation: 'UNDIRECTED',
            properties: ['relWeight']
        },
        VISIT: {
            type: 'VISIT',
            orientation: 'UNDIRECTED',
            properties: ['relWeight']
        }
    })
    """)

In [10]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.graphSage.train(
      'graphEmbedding',
      {
        modelName: 'embeddingModel',
        featureProperties: ['rating', 'review_count'],
        relationshipWeightProperty: 'relWeight',
        projectedFeatureDimension: 128
      }
    )
    """)
    embeddings_df = pd.DataFrame([dict(record) for record in result])
embeddings_df

Unnamed: 0,graphName,graphCreateConfig,modelInfo,configuration,trainMillis
0,graphEmbedding,{},"{'name': 'embeddingModel', 'metrics': {'didCon...","{'maxIterations': 10, 'negativeSampleWeight': ...",2172


In [11]:
with driver.session() as session:
    result = session.run("""
    CALL gds.beta.graphSage.stream(
      'graphEmbedding',
      {
        modelName: 'embeddingModel'
      }
    )
    """)
    embeddings_df = pd.DataFrame([dict(record) for record in result])
embeddings_df

Unnamed: 0,nodeId,embedding
0,0,"[2.2324890589197205e-29, 1.2250624314791083e-3..."
1,1,"[3.9617415697992134e-51, 5.0460940000901133e-5..."
2,2,"[2.6657631445444197e-40, 8.244236350338559e-45..."
3,3,"[6.622930259556193e-19, 1.2346799613558764e-20..."
4,4,"[3.9627608694232156e-51, 5.0473429538820686e-5..."
...,...,...
556,989,"[3.4306359268391706e-90, 2.8392399495609855e-9..."
557,1043,"[4.907155268869664e-90, 3.8696039451459935e-98..."
558,1058,"[6.890071545086272e-21, 2.543644391089251e-22,..."
559,1076,"[4.907155268869664e-90, 3.8696039451459935e-98..."
