# SDM - Neo4j Testing

In [2]:
import os
from dotenv import load_dotenv
from neo4j import GraphDatabase
#from ..src.neo4j_conn import Neo4jConnection

# change cwd to project root
os.chdir('../')

# Load secrets from .env
load_dotenv(dotenv_path='./env/.env')

# instantiate neo4j credentials
URI = os.environ['NEO4J_URI']
AUTH = (os.environ['NEO4J_USERNAME'], os.environ['NEO4J_PASSWORD'])
DB_NAME = os.environ['DB_NAME']

In [3]:
from scripts.cypher_queries import cypher_init_nodes, cypher_init_relations

cypher_init_relations

[' LOAD CSV WITH HEADERS FROM "file:///relation_WritenBy.csv" AS row\n        MERGE (paper:Paper {ID: row.start})\n        MERGE (person:Person {ID: row.end})\n        MERGE (paper)-[:WritenBy]->(person);',
 'LOAD CSV WITH HEADERS FROM "file:///relation_CoauthoredBy.csv" AS row\n        MERGE (paper:Paper {ID: row.start})\n        MERGE (person:Person {ID: row.end})\n        MERGE (paper)-[:CoauthoredBy]->(person);',
 'LOAD CSV WITH HEADERS FROM "file:///relation_CitedBy.csv" AS row\n        MERGE (paper1:Paper {ID: row.start})\n        MERGE (paper2:Paper {ID: row.end})\n        MERGE (paper1)-[:CitedBy]->(paper2);',
 'LOAD CSV WITH HEADERS FROM "file:///relation_PublishedOn_edition.csv" AS row\n        MERGE (paper:Paper {ID: row.start})\n        MERGE (edition:Edition {ID: row.end})\n        MERGE (paper)-[:PublishedOn]->(edition);',
 'LOAD CSV WITH HEADERS FROM "file:///relation_edition_conference.csv" AS row\n        MERGE (edition:Edition {ID: row.start})\n        MERGE (conferen

### Neo4j Connectivity Verification

In [11]:
# main() testing
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    try:
        driver.verify_connectivity()
        print('Neo4j Driver Connectivity Verified!')
    except Exception as e:
        print('Neo4j Driver Unavailable!', e)

Neo4j Driver Connectivity Verified!


### Custom write transaction

In [None]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    with driver.session(database="neo4j") as session:
        with 

### Executing Cypher Queries

In [None]:
# Sample query from neo4j docs
'''def match_person_nodes(tx, age):
    result = tx.run(
        "MATCH (p:Person {age: $age}) RETURN p.name AS name",
        age=age)
    records = list(result)
    summary = result.consume()
    return records, summary

with driver.session(database="neo4j") as session:
    records, summary = session.execute_read(match_person_nodes, age=42)

# Summary information
print("The query `{query}` returned {records_count} records in {time} ms.".format(
    query=summary.query, records_count=len(records),
    time=summary.result_available_after,
))

# Loop through results and do something with them
for person in records:
    print(person)'''

# Synthetic Data Generation

In [57]:
import pandas as pd
import random

authors_df = pd.read_csv('../data/synthetic/authors.csv', delimiter=';')
papers_df = pd.read_csv('../data/synthetic/papers.csv', delimiter=';')
conferences_df = pd.read_csv('../data/synthetic/conferences.csv', delimiter=';')
journals_df = pd.read_csv('../data/synthetic/journals.csv', delimiter=';')

# relations (edges)
written_by = pd.DataFrame(columns=['paper_id', 'person_id']) # paper -> person
coAuthored_by = pd.DataFrame(columns=['paper_id', 'person_id']) # paper -> person
reviewed_by = pd.DataFrame(columns=['paper_id', 'person_id']) # paper -> person
cited_by = pd.DataFrame(columns=['paper1_id', 'paper2_id']) # paper -> paper
published_on = pd.DataFrame(columns=['paper_id', 'edition/volume_id']) # paper -> edition/volume
is_keyword = pd.DataFrame(columns=['paper_id', 'keyword_id']) # paper -> keyword
topic = pd.DataFrame(columns=['keyword_id', 'topic_id']) # keyword -> topic
part_of = pd.DataFrame(columns=['edition/volume_id', 'conference/journal_id']) # edition/volume -> conference/journal


### Creating Node Relationships

In [151]:
# written_by
written_by_dict = {
    'paper_id':[paper for paper in papers_df['id']],
    'author_id':[str(authors_df.sample(n=1)['id'])[2:13].replace(' ', '').replace('\n', '') for i in range(len(papers_df))]
}

written_by = pd.DataFrame(written_by_dict)
written_by

Unnamed: 0,paper_id,author_id
0,1,9742912
1,2,9742925
2,3,9742911
3,4,9742921
4,5,9742928
...,...,...
64,65,9742918
65,66,9742913
66,67,9742921
67,68,9742910


In [None]:
# coAuthored_by
