# Let's walk through a bit of code exploring the Game of Thrones graph.

In [1]:
import numpy as np
import pandas as pd
from neo4j import GraphDatabase

# Connection to running Neo4j Database

For information on how to get the database up and running and connecting JupyterLab to it, you can use the Docker container described in  [this](https://dev.neo4j.com/docker_neo_jupyter) blog post.  Once the database container is up and running, you can load in the Game of Thrones dataset by typing `:play https://guides.neo4j.com/sandbox/graph-data-science/index.html` in the desktop browser.

In [36]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [6]:
conn = Neo4jConnection(uri='bolt://neo4j:7687', user='neo4j', pwd='1234')

In [7]:
conn.query('MATCH (n) RETURN COUNT(n)')

[<Record COUNT(n)=2640>]

# Moving into some data science...

We start by creating an in-memory graph, which allows data science calculations to be done on portions of the overall database.

In [8]:
query = """CALL gds.graph.create('People', 'Person', '*')"""

conn.query(query)

[<Record nodeProjection={'Person': {'properties': {}, 'label': 'Person'}} relationshipProjection={'__ALL__': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': '*', 'properties': {}}} graphName='People' nodeCount=2166 relationshipCount=8160 createMillis=13>]

# Calculations such as PageRank

In [9]:
query = """
    CALL gds.pageRank.stream('People')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS name, score as pagerank
    ORDER BY pagerank DESC
    LIMIT 20
"""

conn.query(query)

[<Record name='Tyrion Lannister' pagerank=14.433243666728849>,
 <Record name='Stannis Baratheon' pagerank=8.389969761197266>,
 <Record name='Tywin Lannister' pagerank=8.363584681922404>,
 <Record name='Varys' pagerank=7.135125708653154>,
 <Record name='Yandry' pagerank=5.547890947202233>,
 <Record name='Ysilla' pagerank=5.523587903582166>,
 <Record name='Theon Greyjoy' pagerank=4.7450533128958385>,
 <Record name='Walder Frey' pagerank=4.526322653622577>,
 <Record name='Sansa Stark' pagerank=4.490734064387214>,
 <Record name='Perra Royce' pagerank=3.9973645459220277>,
 <Record name='Robb Stark' pagerank=3.30253713930421>,
 <Record name='Steffon Baratheon' pagerank=3.2228749673743544>,
 <Record name='Samwell Tarly' pagerank=3.216146497248358>,
 <Record name='Tommen Baratheon' pagerank=3.062266702175972>,
 <Record name='Jon Snow' pagerank=3.05430071565156>,
 <Record name='Cassana Estermont' pagerank=3.0228800240751106>,
 <Record name='Wyman Manderly' pagerank=2.693367135746848>,
 <Record 

# Oops!  I don't recognize some of those characters!

In [11]:
query = """CALL gds.graph.create('PeopleInteractions', 'Person', 'INTERACTS')"""

conn.query(query)

[<Record nodeProjection={'Person': {'properties': {}, 'label': 'Person'}} relationshipProjection={'INTERACTS': {'orientation': 'NATURAL', 'aggregation': 'DEFAULT', 'type': 'INTERACTS', 'properties': {}}} graphName='PeopleInteractions' nodeCount=2166 relationshipCount=3907 createMillis=13>]

In [12]:
query = """
    CALL gds.pageRank.stream('PeopleInteractions')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS name, score as pagerank
    ORDER BY pagerank DESC
    LIMIT 20
"""

conn.query(query)

[<Record name='Tyrion Lannister' pagerank=11.99075112408506>,
 <Record name='Stannis Baratheon' pagerank=7.639891735779173>,
 <Record name='Tywin Lannister' pagerank=7.422262851899261>,
 <Record name='Varys' pagerank=6.5366806067733085>,
 <Record name='Theon Greyjoy' pagerank=4.604171226727502>,
 <Record name='Sansa Stark' pagerank=4.188611313313987>,
 <Record name='Walder Frey' pagerank=3.375046840109691>,
 <Record name='Robb Stark' pagerank=3.1105864392977223>,
 <Record name='Samwell Tarly' pagerank=3.0062773761317825>,
 <Record name='Tommen Baratheon' pagerank=2.958087542768285>,
 <Record name='Jon Snow' pagerank=2.9469302914539055>,
 <Record name='Wyman Manderly' pagerank=2.486454424816056>,
 <Record name='Victarion Greyjoy' pagerank=2.4621002941036965>,
 <Record name='Wulfe' pagerank=2.2427852925207183>,
 <Record name='Robert Baratheon' pagerank=2.242134340671639>,
 <Record name='Wex Pyke' pagerank=1.8133532572478532>,
 <Record name='Tysha' pagerank=1.8099126702539843>,
 <Record n

# That's better!

Now let's write those values to the nodes themselves so we can sort out lower importance characters in the upcoming queries.

In [14]:
query = """
    CALL gds.pageRank.write('PeopleInteractions', {
           maxIterations:20,
           dampingFactor: 0.85,
           writeProperty: 'pagerank'
        }
    )
"""

conn.query(query)

[<Record writeMillis=293 nodePropertiesWritten=2166 ranIterations=15 didConverge=True centralityDistribution={'p99': 1.4541616439819336, 'min': 0.14999961853027344, 'max': 11.990782737731934, 'mean': 0.2251694429173914, 'p90': 0.27559757232666016, 'p50': 0.14999961853027344, 'p999': 7.422270774841309, 'p95': 0.4626455307006836, 'p75': 0.15515899658203125} postProcessingMillis=30 createMillis=0 computeMillis=1068 configuration={'maxIterations': 20, 'writeConcurrency': 4, 'relationshipWeightProperty': None, 'cacheWeights': False, 'concurrency': 4, 'sourceNodes': [], 'writeProperty': 'pagerank', 'nodeLabels': ['*'], 'sudo': False, 'dampingFactor': 0.85, 'relationshipTypes': ['*'], 'tolerance': 1e-07}>]

# What about node similarity?

In [20]:
query = """
    CALL gds.nodeSimilarity.stream(
      'PeopleInteractions',
      {
        degreeCutoff: 20
      }
    )
    YIELD node1, node2, similarity
    RETURN gds.util.asNode(node1).name AS character1, gds.util.asNode(node2).name AS character2, similarity
    ORDER BY similarity DESC
LIMIT 10
"""

conn.query(query)

[<Record character1='Cersei Lannister' character2='Joffrey Baratheon' similarity=0.3409090909090909>,
 <Record character1='Joffrey Baratheon' character2='Gregor Clegane' similarity=0.3409090909090909>,
 <Record character1='Gregor Clegane' character2='Joffrey Baratheon' similarity=0.3409090909090909>,
 <Record character1='Joffrey Baratheon' character2='Cersei Lannister' similarity=0.3409090909090909>,
 <Record character1='Joffrey Baratheon' character2='Jaime Lannister' similarity=0.3333333333333333>,
 <Record character1='Jaime Lannister' character2='Joffrey Baratheon' similarity=0.3333333333333333>,
 <Record character1='Amory Lorch' character2='Gregor Clegane' similarity=0.3225806451612903>,
 <Record character1='Gregor Clegane' character2='Amory Lorch' similarity=0.3225806451612903>,
 <Record character1='Jaime Lannister' character2='Cersei Lannister' similarity=0.2920353982300885>,
 <Record character1='Cersei Lannister' character2='Jaime Lannister' similarity=0.2920353982300885>]

# Didn't you say weighted graphs are cool?

In [None]:
query = """
    CALL gds.graph.create(
      'got-weighted-interactions',
      'Person',
      {
        INTERACTS: {
          type: 'INTERACTS',
          orientation: 'UNDIRECTED',
          aggregation: 'NONE',
          properties: {
            weight: {
              property: 'weight',
              aggregation: 'NONE',
              defaultValue: 0.0
            }
          }
        }
      }
    )
"""

conn.query(query)

# Community detection with Louvain

In [24]:
query = """
    CALL gds.louvain.stream('got-weighted-interactions', { relationshipWeightProperty: 'weight' })
    YIELD nodeId, communityId, intermediateCommunityIds
    RETURN gds.util.asNode(nodeId).name AS name, communityId
    ORDER BY communityId, name ASC
    LIMIT 10
"""

conn.query(query)

[<Record name='Andros Brax' communityId=2>,
 <Record name='Medger Cerwyn' communityId=5>,
 <Record name='Harrion Karstark' communityId=6>,
 <Record name='Aeron Greyjoy' communityId=13>,
 <Record name='Aggar' communityId=13>,
 <Record name='Alysane Mormont' communityId=13>,
 <Record name='Asha Greyjoy' communityId=13>,
 <Record name='Baelor Blacktyde' communityId=13>,
 <Record name='Balon Greyjoy' communityId=13>,
 <Record name='Barbrey Dustin' communityId=13>]

In [26]:
query = """
    CALL gds.louvain.write('got-weighted-interactions', { writeProperty: 'community' })
    YIELD communityCount, modularity, modularities
"""

conn.query(query)

[<Record communityCount=1382 modularity=0.5680039220030543 modularities=[0.4857842129222886, 0.5660974254959941, 0.5680039220030543]>]

# What communities are our major characters in?  (And do they make sense?)

In [27]:
query = """
    MATCH (p:Person)
    WHERE p.name CONTAINS 'Stark'
    RETURN p.name, p.community, p.pagerank
    ORDER BY p.pagerank DESC, p.community
    LIMIT 10
"""

conn.query(query)

[<Record p.name='Sansa Stark' p.community=203 p.pagerank=4.188611313313987>,
 <Record p.name='Robb Stark' p.community=530 p.pagerank=3.1105864392977223>,
 <Record p.name='Rickon Stark' p.community=530 p.pagerank=0.6512236041828712>,
 <Record p.name='Eddard Stark' p.community=530 p.pagerank=0.5706054216015581>,
 <Record p.name='Catelyn Stark' p.community=530 p.pagerank=0.3706267732950437>,
 <Record p.name='Arya Stark' p.community=334 p.pagerank=0.2653351478278637>,
 <Record p.name='Bran Stark' p.community=530 p.pagerank=0.24500166895049916>,
 <Record p.name='Lyanna Stark' p.community=530 p.pagerank=0.17152057029271678>,
 <Record p.name='Rickard Stark' p.community=530 p.pagerank=0.16875453488060546>,
 <Record p.name='Torrhen Stark' p.community=530 p.pagerank=0.1667506389432791>]

# Cool.  But what about machine learning?

In [29]:
query = """
    CALL gds.fastRP.write(
        'got-weighted-interactions',
        {embeddingDimension: 4, writeProperty: 'frp_emb'}
    )
"""

conn.query(query)

[<Record nodeCount=2166 nodePropertiesWritten=2166 createMillis=0 computeMillis=3 writeMillis=55 configuration={'writeConcurrency': 4, 'normalizationStrength': 0.0, 'writeProperty': 'frp_emb', 'iterationWeights': [0.0, 1.0, 1.0], 'embeddingDimension': 4, 'relationshipWeightProperty': None, 'nodeLabels': ['*'], 'sudo': False, 'relationshipTypes': ['*'], 'concurrency': 4}>]

In [30]:
query = """
    MATCH (p:Person)
    WHERE p.name CONTAINS 'Lannister'
    RETURN p.name, p.frp_emb
    ORDER BY p.pagerank DESC
    LIMIT 10
"""

conn.query(query)

[<Record p.name='Tyrion Lannister' p.frp_emb=[-0.8826401829719543, -0.3535643219947815, 1.2257000207901, 1.254091501235962]>,
 <Record p.name='Tywin Lannister' p.frp_emb=[-0.7263013124465942, -0.3305906057357788, 1.39788818359375, 1.1425588130950928]>,
 <Record p.name='Jaime Lannister' p.frp_emb=[-0.7126740217208862, -0.09225631505250931, 1.5216126441955566, 1.0616106986999512]>,
 <Record p.name='Cersei Lannister' p.frp_emb=[-0.9167600870132446, 0.5580523610115051, 1.0806353092193604, 1.2870502471923828]>,
 <Record p.name='Willem Lannister' p.frp_emb=[-0.22210347652435303, -1.0647058486938477, 0.464213103055954, 0.9750747680664062]>,
 <Record p.name='Lancel Lannister' p.frp_emb=[-1.2596091032028198, -0.06971631199121475, 1.1430408954620361, 0.9607170224189758]>,
 <Record p.name='Kevan Lannister' p.frp_emb=[-0.6192851066589355, -0.5045726895332336, 1.2345192432403564, 1.265997290611267]>,
 <Record p.name='Stafford Lannister' p.frp_emb=[0.11904264241456985, -0.6426668167114258, 0.9328647

# We can use these embeddings for normal ML-like tasks

In [32]:
query = """
    MATCH (p1:Person {name: 'Tywin Lannister'})
    MATCH (p2:Person {name: 'Jaime Lannister'})
    RETURN gds.alpha.similarity.cosine(p1.frp_emb, p2.frp_emb) AS similarity
"""

conn.query(query)

[<Record similarity=0.9899932335273712>]

In [35]:
query = """
    MATCH (p1:Person {name: 'Tywin Lannister'})
    MATCH (p2:Person {name: 'Harma'})
    RETURN gds.alpha.similarity.cosine(p1.frp_emb, p2.frp_emb) AS similarity
"""

conn.query(query)

[<Record similarity=0.00886325264686295>]