# Dependencies

In [20]:
import os
import sys

from py2neo import Graph, Path

In [22]:
# Setup the conversation data path
data_file_path = os.path.join('.', 'data', 'seq-to-seq-embedded-start-end.txt')

# Graph client

* We will set up the connection
* Demonstrate that it's working with sample queries
* Define the functionality

In [4]:
# To set up the graph
# Change the <password> To the neo4j database password
graph = Graph("http://neo4j:<password>@localhost:7474")

Create a Word node

In [11]:
graph.run(
    '''
    MERGE (w:Word {text: {text}})
    ON CREATE SET w.count = 1
    ON MATCH SET w.count = w.count + 1
    ''',
    text='elo')

<py2neo.database.Cursor at 0x10f847048>

Create a NEXT relation between two word nodes

In [17]:
graph.run(
    '''
    MATCH (source:Word {text: {source_text}}),
          (destination:Word {text: {destination_text}})
    MERGE (source)-[rel:NEXT]->(destination)
    ON CREATE SET rel.count = 1
    ON MATCH SET rel.count = rel.count + 1''',
    source_text='salih', destination_text='cawani')

<py2neo.database.Cursor at 0x10f83e3c8>

Two functions that create nodes and relationships but we won't use them for now as issuing one big query means less calls to the Neo4j API and letting it optimize Cypher queries better

In [23]:
def merge_node(node_text):
    graph.run(
        '''
        MERGE (w:Word {text: {text}})
        ON CREATE SET w.count = 1
        ON MATCH SET w.count = w.count + 1
        ''', text=node_text)

In [24]:
def merge_relation(source_text, destination_text):
    graph.run(
        '''
        MATCH (source:Word {text: {source_text}}),
              (destination:Word {text: {destination_text}})
        MERGE (source)-[rel:NEXT]->(destination)
        ON CREATE SET rel.count = 1
        ON MATCH SET rel.count = rel.count + 1
        ''', source_text=source_text, destination_text=destination_text)

One function to create two nodes, if they don't already exist then connect them with a relation

In [33]:
def merge_triple(source, destination):
    graph.run(
        '''
        MERGE (source:Word {text: {source_text}})
        ON CREATE SET source.count = 1
        ON MATCH SET source.count = source.count + 1
        MERGE (destination:Word {text: {destination_text}})
        ON CREATE SET destination.count = 1
        ON MATCH SET destination.count = destination.count + 1
        MERGE (source)-[rel:NEXT]->(destination)
        ON CREATE SET rel.count = 1
        ON MATCH SET rel.count = rel.count + 1
        ''', 
        source_text=source, destination_text=destination)

The next bit consumes the data file line by line to reduce memory usage and issue queries for each pair of words

In [43]:
with open(data_file_path, 'r+') as f:
    line = ' '
    cnt = 0
    while line:
        cnt += 1
        sys.stdout.write('\r%d' % cnt)
        words = [x for x in line.strip().split()]
        for i in range(len(words) - 1):
            merge_triple(words[i], words[i + 1])
        line = f.readline()

32436