# Creating and  Using Graph DB in Python Application

In [None]:
!pip install neo4

## Prepare the Environment

In [1]:
from neo4j import GraphDatabase, RoutingControl, basic_auth

ModuleNotFoundError: No module named 'neo4j'

In [None]:
import pandas as pd
import numpy as np

## Setup the Connection to the Database
Choose a database server, a database, credentials for accessing it, and use them to configure the driver.\
The driver helps connecting the database and passing the Cypher statements

In [None]:
# DB URI and authentication
URI = "bolt://localhost:7687"
AUTH = ("neo4j","mikkel405")

In [None]:
driver = GraphDatabase.driver(URI, auth=AUTH)

In [None]:
driver.verify_connectivity()

### Query Templates

Execute a Cypher statement by creating a session and using the methods 
- Session.execute_read() and 
- Session.execute_write()

Do not hardcode or concatenate parameters: 
- use placeholders and 
- specify the parameters as keyword arguments

In [None]:
# template write request
def write_query(query, params={}):
    with driver.session(database="neo4j") as session:
        results = session.execute_write(lambda tx: tx.run(query, params).data())
        # response = [record.values()[0] for record in results]
        return results

In [None]:
# template read request
def read_query(query, params={}):
    with driver.session(database="neo4j") as session:
        results = session.execute_read(lambda tx: tx.run(query, params).data())       
        response = [record.values()[0] for record in results]
        return response

## Harry Potter Demo Implementation

### File Reading Utilities

In [None]:
data_dir = '../Data/HarryPotter/'
file_name = 'HP-characters.csv'
path = data_dir+file_name

In [None]:
# with pandas
data = pd.read_csv(path)
data

In [None]:
data.isnull().sum()

In [None]:
data.info()

In [None]:
datax = data.replace({np.nan: None})

In [None]:
datax.sample(5)

In [None]:
datax['house'].tolist()

### Ingest the Data into the Database

#### Characters

In [None]:
create_db = """
MERGE (c:Character{name:$title, url:$url})
    SET c.aliases = CASE trim(toString(c.aliases)) WHEN 'None' THEN null ELSE $aliases END
    SET c.blood = CASE trim(toString(c.blood)) WHEN 'None' THEN null ELSE $blood END
    SET c.nationality = CASE trim(toString(c.nationality)) WHEN 'None' THEN null ELSE $nationality END
    SET c.gender = CASE trim(toString(c.gender)) WHEN 'None' THEN null ELSE $gender END
    SET c.species = CASE trim(toString(c.species)) WHEN 'None' THEN null ELSE $species END
  
"""


In [None]:
# Execute transaction
for index, row in datax.iterrows():
    response = write_query(create_db, 
                           params = {'title':row['title'], 'url':row['url'], 'aliases':row['aliases'],
                                     'blood':row['blood'], 'nationality':row['nationality'], 
                                     'species':row['species'], 'gender':row['gender'] 
                                    })

In [None]:
add_groups = '''
    MERGE (c:Character {title:$title})
    WITH c
    UNWIND split($loyalty, ',') AS l
        MERGE (g:Group {name:l}) 
        MERGE (c)-[:LOYAL_TO]->(g)
    RETURN c
'''

In [None]:
# Execute transaction
for index, row in datax.iterrows():
    response = write_query(add_groups, params = {'title':row['title'], 'loyalty':row['loyalty'] })

In [None]:
add_family = '''
    MERGE (c:Character {title:$title})
    WITH c
    UNWIND split($family, ',') AS f
    MERGE (f1:Character {name:f}) 
    MERGE (c)-[t:IN_FAMILY_WITH]->(f1) 
    RETURN c
'''

In [None]:
# Execute transaction
for index, row in datax.iterrows():
    response = write_query(add_family, params = {'title':row['title'], 'family':row['family'] })

In [None]:
add_house = '''
    MERGE (c:Character {title:$title})
    WITH c
    FOREACH (h in CASE WHEN $house IS NOT NULL THEN [1] ELSE [] END | 
    MERGE (h1:House {name:$house}) 
    MERGE (c)-[:BELONGS_TO]->(h1)  ) 
    RETURN c
'''

In [None]:
# Execute transaction
for index, row in datax.iterrows():
    response = write_query(add_house, params = {'title':row['title'],'house':row['house']})

#### Relations

In [None]:
file_name = 'HP-relations-seen.csv'
path = data_dir+file_name

In [None]:
# with pandas
data = pd.read_csv(path)
data

In [None]:
datay = data.replace({np.nan: None})

In [None]:
datay.sample(5)

In [None]:
create_rel = """
    MERGE (s:Character{name:$source})
    MERGE (t:Character{name:$target})
    MERGE (s)-[r:INTERACTS]-(t)
    SET r.weight = $value
"""    

In [None]:
# Execute transaction
for index, row in datay.iterrows():
    response = write_query(create_rel, 
                           params = {'source':row['source'], 'target':row['target'], 'value':row['value']})

## Query the Database

To enable permissions for running the algorithms on the localmachine, aadd this line to your config file:
__dbms.security.procedures.unrestricted=jwt.security.*, apoc.*, gds.*__

In [None]:
def read_gds(query, params=None):
    with driver.session() as session:
        result = session.run(query, params)
        return pd.DataFrame([r.values() for r in result], columns=result.keys())

### Community Detection

In [None]:
# create a projection
query = """
    CALL gds.graph.project('dor', 'Character', {INTERACTS:{orientation:'UNDIRECTED'}})
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS title, score AS rank
    ORDER BY rank DESCENDING, title LIMIT 5
"""

In [None]:
response = read_gds(query)
response

In [None]:
query = """
    CALL gds.louvain.stream('dom')
    YIELD nodeId, communityId, intermediateCommunityIds
    RETURN gds.util.asNode(nodeId).name AS name, communityId
    ORDER BY communityId ASC LIMIT 15
"""

In [None]:
# writes the calculated community coeficients as parameters of the nodes
response = read_gds(query)
response

### Degree Centrality

In [None]:
query = """
    CALL gds.degree.stream('dor')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS title, score AS connections
    ORDER BY rank DESCENDING, title LIMIT 5
"""

In [None]:
response = read_gds(query)
response

### Page Rank

In [None]:
# create projection
query = '''
CALL gds.graph.project(
  'dom',
  'Character',
  'INTERACTS',
  {
    relationshipProperties: 'weight'
  }
)
'''

In [None]:
response = read_gds(query)
response

In [None]:
query = """
    CALL gds.pageRank.stream('dom')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).title AS title, score
    ORDER BY score DESC
"""

In [None]:
response = read_gds(query)
response

### Shortest Path

In [None]:
# create projection
query = '''
CALL gds.graph.project(
    'pat',
    'Character',
    'INTERACTS',
    {
        relationshipProperties: 'weight'
    }
)
'''

In [None]:
response = read_gds(query)
response

In [None]:
query = """
    MATCH (source:Character {title: 'Harry Potter'}), (target:Character {title: 'Albus Dumbledore'})
    CALL gds.shortestPath.dijkstra.stream('pat', 
    {
        sourceNode: source,
        targetNode: target,
        relationshipWeightProperty: 'weight'
    })
    YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
    RETURN
        index,
        gds.util.asNode(sourceNode).title AS sourceNodeName,
        gds.util.asNode(targetNode).title AS targetNodeName,
        totalCost,
        [nodeId IN nodeIds | gds.util.asNode(nodeId).title] AS nodeNames,
        costs,
        nodes(path) as path
    ORDER BY index
"""

In [None]:
response = read_gds(query)
response