In [3]:
from neo4j import GraphDatabase
import os
import pandas as pd
print("Import successful")

Import successful


In [4]:
URI = os.environ["NEO4J_URI"]
USER=os.environ["NEO4J_USER_NAME"]
PASSWORD=os.environ["NEO4J_PASSWD"]
AUTH = (os.environ["NEO4J_USER_NAME"], os.environ["NEO4J_PASSWD"])

In [5]:
#Neo4J connect and Query Boilerplate

class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            #response = (session.run(query, parameters))
            response = list(session.run(query, parameters))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        
        #return pd.DataFrame([r.values() for r in response], columns=response.keys())
        return response
    
    def multi_query(self, multi_line_query, parameters=None, db=None):
        for li in multi_line_query.splitlines():
                print(li)
                result=self.query(li, parameters=None, db=None)
                print(result)

In [6]:
#Make a default connection and it should return `[<Record count(n)=0>]`
conn = Neo4jConnection(uri=URI, 
                       user=USER,              
                       pwd=PASSWORD)

#if db is empty, then seed with init values 
res=conn.query('MATCH (n) RETURN count(n)')
print(res)

[<Record count(n)=17474>]


In [21]:
#Optional
#Clean Neo4J DB, query the DB and print the result

res=conn.query("""
MATCH (n) DETACH DELETE n;
""")

res=conn.query('MATCH (n) RETURN count(n)')
print(res)


[<Record count(n)=0>]


In [None]:
# Queries

# Load Pipeline info. There are more info on the pipeline. Not loading them at this moment. Will be laoded in the final version.
# Data is available in the CSV. Just need to be included in the query
load_pipeline = """LOAD CSV WITH HEADERS FROM 'file:///small-papers.csv' AS row
MERGE (pipeline:Pipeline {pipelineID: row.id})
ON CREATE SET pipeline.Title = row.title, pipeline.urlAbs = row.url_abs, pipeline.urlPdf = row.url_pdf, pipeline.gitRepo = row.git_repos;"""

# Load the task database
load_tasks = """LOAD CSV WITH HEADERS FROM 'file:///pwc-tasks.csv' AS row
MERGE (task:Task {taskID: row.id}) 
ON CREATE SET task.taskName = row.name, task.taskDesc = row.description, task.category = row.category, task.modality = row.modality;"""

# Load Datasets
load_datasets = """LOAD CSV WITH HEADERS FROM 'file:///pwc-datasets.csv' AS row
MERGE (dataset:Dataset {datasetID: row.id})
ON CREATE SET dataset.datasetName = row.name, dataset.datasetFullName = row.full_name, dataset.datasetDesc = row.description, dataset.url = row.url;"""

# Load Models/Methods
load_methods = """LOAD CSV WITH HEADERS FROM 'file:///pwc-methods.csv' AS row
MERGE (method:Method {methodID: row.id})
ON CREATE SET method.methodName = row.name, method.methodFullName = row.full_name, method.modelDesc = row.description;"""

# Load Evaluations
load_evals = """LOAD CSV WITH HEADERS FROM 'file:///pwc-evaluations.csv' AS row
MERGE (eval:Evaluation {evalID: row.id})
ON CREATE SET eval.evalName = row.name, eval.evalDesc = row.description;"""

# Load Results
load_results = """LOAD CSV WITH HEADERS FROM 'file:///pwc-results.csv' AS row
MERGE (result:Result {resultID: row.id})
ON CREATE SET result.resultMetrics = row.metrics, result.bestRank = row.best_rank, result.methodology = row.methodology, 
result.bestMetric = row.best_metric;"""


# Before relationships, need to construct a constraint
# constraint = """
# CREATE INDEX paper_id FOR (pipeline:Pipeline) ON (pipeline.pipelineID);
# CREATE INDEX task_id FOR (task:Task) ON (task.taskID);
# CREATE INDEX dataset_id FOR (dataset:Dataset) ON (dataset.datasetID);
# CREATE INDEX method_id FOR (method:Method) ON (method.methodID);
# CREATE INDEX eval_id FOR (eval:Evaluation) ON (eval.evalID);
# CREATE INDEX result_id FOR (result:Result) ON (result.resultID);
# CREATE CONSTRAINT pipeline_id FOR (pipeline:Pipeline) REQUIRE pipeline.pipelineID IS UNIQUE;
# CALL db.awaitIndexes();
# """

constraint = """
CREATE INDEX dataset_id FOR (dataset:Dataset) ON (dataset.datasetID);
CREATE CONSTRAINT eval_id FOR (eval:Evaluation) REQUIRE eval.evalID IS UNIQUE;
"""

call = """CALL db.indexes();"""

# Loading Relationships
# Paper-Dataset relationship
rel_pipeline_dataset = """LOAD CSV WITH HEADERS FROM 'file:///relations/eval_dataset.csv' AS row
MATCH (eval:Evaluation {evalID: row.eval_id})
MATCH (dataset:Dataset {datasetID: row.dataset_id})
MERGE (eval)-[pd:evaluatedON]->(dataset);"""
#ON CREATE SET op.unitPrice = toFloat(row.UnitPrice), op.quantity = toFloat(row.Quantity);

In [None]:
# res_pipeline=conn.query(load_pipeline)
# res_task=conn.query(load_tasks)
res_datasets=conn.query(load_datasets)
# res_methods=conn.query(load_methods)
res_evals=conn.query(load_evals)
# res_results=conn.query(load_results)
print("Nodes added")

In [12]:
# res=conn.query('MATCH (p:Task) return p LIMIT 5;')
# relationship_query = conn.query('MATCH path = (dataset:Dataset {datasetID: "mnist"})<-[pd:evaluatedON]-(eval)
# RETURN path
# LIMIT 25;')
print(res)

[<Record p=<Node element_id='4' labels=frozenset({'Task'}) properties={'modality': '[]', 'category': '[]', 'taskID': 'task'}>>, <Record p=<Node element_id='5' labels=frozenset({'Task'}) properties={'taskName': '2048', 'modality': '[]', 'category': '[]', 'taskID': '2048'}>>, <Record p=<Node element_id='6' labels=frozenset({'Task'}) properties={'taskName': '2D Classification', 'modality': "['2d']", 'category': "['classification']", 'taskID': '2d-classification'}>>, <Record p=<Node element_id='7' labels=frozenset({'Task'}) properties={'taskDesc': 'What is Human Pose Estimation?\r\nHuman pose estimation is the process of estimating the configuration of the body (pose) from a single, typically monocular, image. Background. Human pose estimation is one of the key problems in computer vision that has been studied for well over 15 years.  The reason for its importance is the\r\nabundance of applications that can benefit from such a technology. For example,\r\nhuman pose estimation allows for h

In [None]:
# Loading rdf. Src - https://neo4j.com/labs/neosemantics/tutorial/

# MUST - Create a constraint to generate and unique URI to the nodes
constraint = """CREATE CONSTRAINT n10s_unique_uri ON (r:Resource)
ASSERT r.uri IS UNIQUE"""

# To drop the constraint - DROP CONSTRAINT ON (r:Resource) ASSERT r.uri IS UNIQUE

# Add graph Config
 graph_config = """CALL n10s.graphconfig.init({
  handleVocabUris: 'MAP'
})"""

load_rdf = """CALL n10s.rdf.preview.fetch(
  'https://raw.githubusercontent.com/ML-Schema/core/master/MLSchema.ttl',
  'Turtle'
)"""