In [1]:
from neo4j import GraphDatabase
import pandas as pd
import time

In [2]:
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [3]:
uri = 'bolt://184.73.147.145:7687'
user = 'neo4j'
pwd = 'angles-alternatives-information'

conn = Neo4jConnection(uri=uri, user=user, pwd=pwd)

In [4]:
user_data = {
    'id': [0, 1, 2, 3, 4],
    'name': ['Alice', 'Brian', 'Carla', 'David', 'Ed']
}

user_df = pd.DataFrame(user_data)
user_df.head()

Unnamed: 0,id,name
0,0,Alice
1,1,Brian
2,2,Carla
3,3,David
4,4,Ed


In [5]:
tweet_data = {
    'id': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
    'text': ['Hi', 'Hello', 'Yo', 'Sup', 'Bye', 'Later', 'Ciao', 'Peace', 'See ya', 'Out']
}

tweet_df = pd.DataFrame(tweet_data)
tweet_df.head(10)

Unnamed: 0,id,text
0,100,Hi
1,200,Hello
2,300,Yo
3,400,Sup
4,500,Bye
5,600,Later
6,700,Ciao
7,800,Peace
8,900,See ya
9,1000,Out


In [6]:
follows_data = {
    'source': [0, 0, 1, 2, 3, 4],
    'target': [1, 2, 2, 3, 2, 2]
}

follows_df = pd.DataFrame(follows_data)
follows_df.head(10)

Unnamed: 0,source,target
0,0,1
1,0,2
2,1,2
3,2,3
4,3,2
5,4,2


In [7]:
tweet_data = {
    'source': [0, 0, 1, 1, 2, 2, 3, 3, 4, 4],
    'target': [100, 500, 200, 600, 300, 700, 400, 800, 900, 1000]
}

tweets_df = pd.DataFrame(tweet_data)
tweets_df.head(10)

Unnamed: 0,source,target
0,0,100
1,0,500
2,1,200
3,1,600
4,2,300
5,2,700
6,3,400
7,3,800
8,4,900
9,4,1000


In [8]:
conn.query('CREATE CONSTRAINT users IF NOT EXISTS ON (u:User) ASSERT u.id IS UNIQUE')
conn.query('CREATE CONSTRAINT tweets IF NOT EXISTS ON (t:Tweet) ASSERT t.id IS UNIQUE')

[]

In [9]:
def insert_data(query, rows, batch_size = 10000):
    # Function to handle the updating the Neo4j database in batch mode.
    
    total = 0
    batch = 0
    start = time.time()
    result = None
    
    while batch * batch_size < len(rows):

        res = conn.query(query, 
                         parameters = {'rows': rows[batch*batch_size:(batch+1)*batch_size].to_dict('records')})
        total += res[0]['total']
        batch += 1
        result = {"total":total, 
                  "batches":batch, 
                  "time":time.time()-start}
        
    return result

In [10]:
def add_users(rows):
    
    query = """UNWIND $rows AS row
               MERGE (:User {id: row.id, name: row.name})
               RETURN COUNT(*) AS total
            """
    
    return insert_data(query, rows)


add_users(user_df)

{'total': 5, 'batches': 1, 'time': 0.5177779197692871}

In [11]:
def add_tweets(rows):
    
    query = """UNWIND $rows AS row
               MERGE (:Tweet {id: row.id, text: row.text})
               RETURN COUNT(*) AS total
            """
    
    return insert_data(query, rows)


add_tweets(tweet_df)

{'total': 10, 'batches': 1, 'time': 0.326185941696167}

In [12]:
def add_follows_rel(rows):
    
    query = """UNWIND $rows AS row
               MATCH (source:User {id: row.source})
               MATCH (target:User {id: row.target})
               MERGE (source)-[r:FOLLOWS]->(target)
               RETURN COUNT(r) AS total
            """
    
    return insert_data(query, rows)


add_follows_rel(follows_df)

{'total': 6, 'batches': 1, 'time': 0.7009670734405518}

In [13]:
def add_writes_rel(rows):
    
    query = """UNWIND $rows AS row
               MATCH (source:User {id: row.source})
               MATCH (target:Tweet {id: row.target})
               MERGE (source)-[r:WRITES]->(target)
               RETURN COUNT(r) AS total
            """
    
    return insert_data(query, rows)


add_writes_rel(tweets_df)

{'total': 10, 'batches': 1, 'time': 0.5380897521972656}