In [3]:
from flask import Flask, request, jsonify
import redis
import csv
import msgpack
import pandas as pd
import numpy as np
import time
import argparse
import difflib
import json
import time
import pickle
from collections import defaultdict
#import tensorflow
#from keras_preprocessing.text import tokenizer_from_json
#from keras.models import load_model
#from keras_preprocessing.sequence import pad_sequences
from neo4j import GraphDatabase
from py2neo import Graph, Node, Relationship, NodeMatch, NodeMatcher
import time

In [4]:
uri = "bolt://localhost:7687"
username = ""
password = ""
graph = Graph("bolt://localhost:7687", auth=(username, password))

In [5]:
def update_word_description(word, new_description, index_value=None):
    query = """
    MATCH path=(root:TrieNode {letter:'root'})
    """
    for letter in word:
        query += f"-[:HAS]->(:TrieNode {{letter:'{letter}'}})"
    if index_value is not None:
        query += f"""
        WITH nodes(path)[-1] AS word_node
        SET word_node.description = $new_description
        """
    else:
        query += """
        WHERE last(nodes(path)).is_word = true
        SET last(nodes(path)).description = $new_description
        """
    graph.run(query, new_description=new_description) 
def get_word_description(word):
    query_1 = """
    MATCH path=(root:TrieNode {letter:'root'})
    """
    for letter in word:
        query_1 += f"-[:HAS]->(:TrieNode {{letter:'{letter}'}})"
    query_desc = query_1 + """
    WHERE last(nodes(path)).is_word = true
    RETURN last(nodes(path)).description as description, last(nodes(path)).index_value as index_value
    """
    results = graph.run(query_desc).data()
    nodes = []
    for result in results:
        node = {}
        node['description'] = result['description']
        node['index_value'] = result['index_value']
        nodes.append(node)
    #print(f'Found {len(nodes)} instances of {word} with the following index values: {[node["index_value"] for node in nodes]}')
    if nodes:
        return nodes
    else:
        return None
def update_word_description_index(word, new_description, index_value=None, new_index_value=None):
    query = """
    MATCH path=(root:TrieNode {letter:'root'})
    """
    for letter in word:
        query += f"-[:HAS]->(:TrieNode {{letter:'{letter}'}})"
    if index_value is not None and new_index_value is not None:
        query += f"""
        WITH nodes(path)[-1] AS word_node
        WHERE word_node.index_value = $index_value
        SET word_node.description = $new_description, word_node.index_value = $new_index_value
        """
        graph.run(query, index_value=index_value, new_description=new_description, new_index_value=new_index_value)
    elif index_value is not None:
        query += f"""
        WITH nodes(path)[-1] AS word_node
        WHERE word_node.index_value = $index_value
        SET word_node.description = $new_description
        """
        graph.run(query, index_value=index_value, new_description=new_description)
    else:
        query += """
        WHERE last(nodes(path)).is_word = true
        SET last(nodes(path)).description = $new_description
        """
        graph.run(query, new_description=new_description)
from py2neo import Graph, Node, Relationship
def update_description_lineage(word, new_description, index_value=None):
    matcher = NodeMatcher(graph)

    if index_value:
        # find nodes with specific index value
        nodes = list(matcher.match(word, index_value=index_value))
    else:
        # find nodes with specific word value
        nodes = list(matcher.match(word))

    for node in nodes:
        # update node description
        node['description'] = new_description
        graph.push(node)

        # Create a new relationship to track data lineage
        updated_node = matcher.match(word, index_value=node['index_value']).first()
        data_lineage_relationship = Relationship(updated_node, "DERIVED_FROM", node, "DataSource")
        graph.create(data_lineage_relationship)

In [33]:
update_description_lineage('Dextrus', 'desc', 'Blog' )

In [6]:
def insert_in_neo4j(word, description, index_value = None):
    # Insert the word and its description into Neo4j
    # Start at the root node
    root_node = Node("TrieNode", letter="root")

    # Traverse Neo4j to find the node corresponding to the last character in the word
    current_node = root_node
    neo_node = None
    for i, char in enumerate(word):
        if neo_node is None:
            # If this is the first iteration, start at the root
            query = """
            MATCH (node:TrieNode {letter:$char})
            WHERE id(node) = $parentId
            RETURN node
            """
            result = graph.evaluate(query, char=char, parentId=root_node.identity)
        else:
            # Otherwise, traverse the graph from the current node
            query = """
            MATCH (parent:TrieNode)-[:HAS]->(child:TrieNode {letter:$char, index_value:$index_value})
            WHERE id(parent) = $parentId
            RETURN child
            """
            result = graph.evaluate(query, char=char, index_value=index_value, parentId=neo_node.identity)

        if result is None:
            # If the node does not exist, create it and the relationship with its parent in Neo4j
            child_neo_node = Node("TrieNode", letter=char, is_word=(i == len(word) - 1),index_value=index_value)
            if neo_node is None:
                # If this is the first iteration, the root node is the parent
                relationship = Relationship(root_node, "HAS", child_neo_node)
            else:
                relationship = Relationship(neo_node, "HAS", child_neo_node)
            graph.create(child_neo_node)
            graph.create(relationship)
            neo_node = child_neo_node
        else:
            # If the node already exists, simply move to it in Neo4j
            neo_node = result

    # Set the description property of the node corresponding to the word in Neo4j
    if neo_node['is_word']:
        query = """
        MATCH (node:TrieNode)
        WHERE id(node) = $nodeId
        SET node.description = $description
        SET node.word = $word
        """
        graph.run(query, nodeId=neo_node.identity, word = word, description=description)

##### Let's define four different domains: 
- Blog
- Videos 
- Products 
- Services 

In [8]:
indexes = ['Blog', 'Videos', 'Products', 'Services']

def DISPLAY(word):
    nodes = get_word_description(word)
    if nodes == None:
        number_nodes = 0
    else:
        number_nodes = len(nodes)
    if number_nodes > 0:
        print(f'The word {word} has been found {number_nodes} times in the following domains:')
        for i in range(number_nodes):
            instance = nodes[i]
            print('In: ' + instance['index_value'] + ' --> ' + instance['description'])
    else:
        print('The result has not been found anywhere. Would you like to define it? Press ENTER if no, or insert your description and where you would like to place it.')
        description = input()
        if description == '':
            print('Sorry for the trouble')
        else: 
            count = 1
            for i in range(len(indexes)):
                print(str(count) + '. ' + indexes[i])
                count += 1
            index_choice = input()
            index = indexes[int(index_choice) - 1]
            insert_in_neo4j(word, description, index)

##### Let's place a few nodes in the tree, one for each domain.
Some of the nodes are going to be overlapping in name. 

In _Blog_:
- Dextrus --> RightData"s second product.
- Lineage --> A newfound property of data.
- The Leadership --> Meet the leadership team.
- Updates -->  Learn the team's updates.

In _Videos_:
- Dextrus --> Dextrus demonstration.
- Rdt -->  Rdt demonstration.
- The Leadership --> Meet the leadership team.
- Atlanta --> Take a look at the HQ. 

In _Products_:
- Dextrus --> RightData's second product.
- Rdt --> RightData's first product. 
- NILE -->  SaaSification of RightData products.
- fuzzy search --> Similarity between strings.

In _Services_:
- NILE -->  SaaSification of RightData products.
- Consultation - Rdt -->  Consultation for the Rdt product.
- Consultation - Dextrus -->  Consultation for the Dextrus product. 
- Installation --> Installation of both services. 


In [9]:
start_blog = time.time()
insert_in_neo4j('Dextrus', 'RightData"s second product', 'Blog')
insert_in_neo4j('Lineage', 'A newfound property of data', 'Blog')
insert_in_neo4j('The Leadership', 'Meet the leadership team', 'Blog')
insert_in_neo4j('Updates', 'earn the team"s updates', 'Blog')
end_blog = time.time()

blog_time = end_blog - start_blog

In [11]:
start_video = time.time()
insert_in_neo4j('Dextrus', 'Dextrus demonstration', 'Videos')
insert_in_neo4j('Rdt', 'Rdt demonstration', 'Videos')
insert_in_neo4j('The Leadership', 'Meet the leadership team', 'Videos')
insert_in_neo4j('Atlanta', 'Take a look at the HQ', 'Videos')
end_video = time.time()
video_time = end_video - start_video

In [24]:
start_products = time.time()
insert_in_neo4j('Dextrus', 'RightData"s second product', 'Products')
insert_in_neo4j('Rdt', 'RightData"s first product', 'Products')
insert_in_neo4j('NILE', 'SaaSification of RightData products', 'Products')
insert_in_neo4j('fuzzy search', 'Similarity between two strings', 'Products')
end_products = time.time()
products_time = end_products - start_products

In [18]:
start_services = time.time()
insert_in_neo4j('NILE', 'SaaSification of RightData products', 'Services')
insert_in_neo4j('Consultation-Rdt', 'Consultation for the Rdt product', 'Services')
insert_in_neo4j('Consultation-Dextrus', 'Consultation for the Dextrus product', 'Services')
insert_in_neo4j('Installation', 'Installation of both services', 'Services')
end_services = time.time()

services_time = end_services - start_services

In [26]:
print('Time it took to complete the following insertions:')
print(f'For the "Blog" domain: {blog_time}')
print(f'For the "Videos" domain: {video_time}')
print(f'For the "Products" domain: {products_time}')
print(f'For the "Services" domain: {services_time}')

Time it took to complete the following insertions:
For the "Blog" domain: 0.5012834072113037
For the "Videos" domain: 0.5275223255157471
For the "Products" domain: 0.3205840587615967
For the "Services" domain: 1.411466360092163


In [34]:
DISPLAY('Dextrus')

The word Dextrus has been found 3 times in the following domains:
In: Blog --> desc
In: Videos --> Dextrus demonstration
In: Products --> RightData"s second product


In [9]:
DISPLAY('NILE')

The word NILE has been found 2 times in the following domains:
In: Services --> SaaSification of RightData products
In: Products --> SaaSification of RightData products


In [11]:
DISPLAY('The Leadership')

The word The Leadership has been found 2 times in the following domains:
In: Blog --> Meet the leadership team
In: Videos --> Meet the leadership team


In [12]:
# Define a function to retrieve the transformations over time for a node
def get_transformations_over_time(node_name):
    query = f"MATCH (n:Node {{name: '{node_name}'}})-[:DERIVED_FROM*]->(m:Node) RETURN m.name, m.description, m.timestamp ORDER BY m.timestamp"
    results = graph.run(query)
    transformations = []
    for result in results:
        transformation = {"name": result["m.name"], "description": result["m.description"], "timestamp": result["m.timestamp"]}
        transformations.append(transformation)
    return transformations
# Example usage
transformations = get_transformations_over_time("Dextrus")
for transformation in transformations:
    
    print(transformation)