## Loading data into neo4j


In [5]:
from neo4j import GraphDatabase

class Neo4jConnect:
    def __init__(self, user, password):
        self.driver = GraphDatabase.driver("bolt://localhost:7687", auth=(user, password))
        self.user = user
    def close(self):
        self.driver.close()
        
    def query(self, query):
        session = self.driver.session(database="cran")
        result = session.run(query)
        return result
    
    def clean_database(self):
        """
        Delete all nodes and edge in Neo4j test container
        """
        q = "MATCH (n) DETACH DELETE (n)"
        try:
            session = self.driver.session()
            session.run(q)
        except Exception as e:
            print(e)
    
    def test_neo4j_connection(self):
        """Test neo4j connection
        """
        session = self.driver.session()
        result = session.run("Match () Return 1 Limit 1")
        return result
    def __str__(self):
        return f'Connection successful for user: {self.user}.\nConnected to Neo4J database uri: {self.uri}'
    
connection = Neo4jConnect('admin', 'cran2graph')

In [None]:
from graphutils.utils import Neo4jConnect

connection = Neo4jConnect('admin', 'cran2graph')

# Create a constraint on 'Package' nodes based on the 'package' property
constraint_package_query = "CREATE CONSTRAINT IF NOT EXISTS FOR (p:Package) REQUIRE p.package IS UNIQUE"

# Create a constraint on 'Person' nodes based on the 'person' property
constraint_person_query = "CREATE CONSTRAINT IF NOT EXISTS FOR (a:Person) REQUIRE a.person IS UNIQUE"

# Create an index on the 'person' property of 'Person' nodes
person_index_query = "CREATE INDEX IF NOT EXISTS FOR :Person(person)"

In [5]:
from graphutils.utils import get_neo4j_connection
db_session = get_neo4j_connection()

--- Logging error ---
Traceback (most recent call last):
  File "c:\Users\Dee\root\Projects\dev\cran2graph\scripts\graphutils\utils.py", line 48, in get_neo4j_connection
    driver.verify_connectivity()
  File "c:\Users\Dee\root\Projects\dev\cran2graph\venv\Lib\site-packages\neo4j\_sync\driver.py", line 985, in verify_connectivity
    self._get_server_info(session_config)
  File "c:\Users\Dee\root\Projects\dev\cran2graph\venv\Lib\site-packages\neo4j\_sync\driver.py", line 1200, in _get_server_info
    return session._get_server_info()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Dee\root\Projects\dev\cran2graph\venv\Lib\site-packages\neo4j\_sync\work\session.py", line 175, in _get_server_info
    self._connect(READ_ACCESS, liveness_check_timeout=0)
  File "c:\Users\Dee\root\Projects\dev\cran2graph\venv\Lib\site-packages\neo4j\_sync\work\session.py", line 133, in _connect
    super()._connect(
  File "c:\Users\Dee\root\Projects\dev\cran2graph\venv\Lib\site-packages\neo4j\_sync

UnboundLocalError: cannot access local variable 'session' where it is not associated with a value

In [2]:
import neo4j
import logging

def establish_connection():
    driver = neo4j.GraphDatabase.driver(
        "bolt://localhost:7687", auth=("admin", "cran2graph")
    )
    try:
        driver.verify_connectivity()
        session = driver.session(database="cran")
        return session
    except Exception as connection_error:
        logging.error("Failed to establish session to neo4j", connection_error)
        session.close()
        driver.close()  
        
db_session = establish_connection()
db_session.close()

In [47]:
from graphutils.utils import Neo4jConnect
connection = Neo4jConnect('admin', 'cran2graph')

# Create a constraint on 'Package' nodes based on the 'package' property
constraint_package_query = "CREATE CONSTRAINT IF NOT EXISTS FOR (p:Package) REQUIRE p.package IS UNIQUE"

# Create a constraint on 'Person' nodes based on the 'person' property
constraint_person_query = "CREATE CONSTRAINT IF NOT EXISTS FOR (a:Person) REQUIRE a.person IS UNIQUE"

# Create an index on the 'person' property of 'Person' nodes
person_index_query = "CREATE INDEX ON :Person(person)"

constraints = [constraint_person_query, constraint_package_query]

# for constraint in constraints:
#     print(f"Creating the constraints with: {constraint}")
#     connection.query(constraint)
# connection.close()


db_session = establish_connection()

for constraint in constraints:
    print(f"Creating the constraints with: {constraint}")
    db_session.run(constraint)
db_session.close()
    

Creating the constraints with: CREATE CONSTRAINT IF NOT EXISTS FOR (a:Person) REQUIRE a.person IS UNIQUE
Creating the constraints with: CREATE CONSTRAINT IF NOT EXISTS FOR (p:Package) REQUIRE p.package IS UNIQUE


In [None]:
## Loading package nodes from csv file
load_package_query = """
// Load 'Package' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
CALL {
    WITH row
    MERGE (p:Package {
        package: row.package,
        version: row.version,
        license: row.license,
        md5sum: row.md5sum,
        description: row.description,
        published: row.published_date
    })
    ON CREATE SET
        p.package = row.package,
        p.version = row.version,
        p.license = row.license,
        p.md5sum = row.md5sum,
        p.description = row.description,
        p.published = row.published_date
    ON MATCH SET
        p.package = row.package,
        p.version = row.version,
        p.license = row.license,
        p.md5sum = row.md5sum,
        p.description = row.description,
        p.published = row.published_date
} IN TRANSACTIONS OF 10000 ROWS;
"""
print(f"loading query....")
db_session.run(load_package_query)
db_session.close()
print(f"end operation....")

In [53]:
## THis fix the issue
## Loading package nodes from csv file
load_package_query = """
// Load 'Package' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
CALL {
    WITH row
    MERGE (p:Package {package: row.package})
    ON CREATE SET
        p.version = row.version,
        p.license = row.license,
        p.md5sum = row.md5sum,
        p.description = row.description,
        p.published = row.published_date
    ON MATCH SET
        p.version = row.version,
        p.license = row.license,
        p.md5sum = row.md5sum,
        p.description = row.description,
        p.published = row.published_date
} IN TRANSACTIONS OF 10000 ROWS;
"""
print(f"loading query....")
db_session.run(load_package_query)
db_session.close()
print(f"end operation....")

loading query....
end operation....


In [33]:
## Loading package nodes from csv file
load_package_query = """
// Load 'Package' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
CALL {
    WITH row
    MERGE (p:Package {
        package: row.package,
        version: row.version,
        license: row.license,
        md5sum: row.md5sum,
        description: row.description,
        published: row.published_date
    })
    ON CREATE SET
        package = row.package,
        version = row.version,
        license = row.license,
        md5sum = row.md5sum,
        description = row.description
        published = row.published_date
    ON MATCH SET
        package = row.package,
        version = row.version,
        license = row.license,
        md5sum = row.md5sum,
        description = row.description
        published = row.published_date
} IN TRANSACTIONS OF 10000 ROWS;
"""
print(f"loading query....")
connection.query(load_package_query)
connection.close()
print(f"end operation....")

loading query....
end operation....


In [6]:
## working comment
## Loading person nodes from csv file
load_person_query = """
// Load 'person' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
CALL {
    WITH row
    MERGE (a:Person {
        person: COALESCE(row.author, "N/A")
    })
} IN TRANSACTIONS OF 10000 ROWS;
"""
print(f"loading query....")
connection.query(load_person_query)
connection.close()
print(f"end operation....")

loading query....
end operation....


In [56]:
db_session.close()

In [3]:
## THis fix the issue
## Loading package nodes from csv file
## Loading person nodes from csv file
load_person_query = """
// Load 'person' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
CALL {
    WITH row
    MERGE (a:Person {
        person: COALESCE(row.author, "N/A")
    })
} IN TRANSACTIONS OF 10000 ROWS;
"""
print(f"loading query....")
db_session.run(load_person_query)
db_session.close()
print(f"end operation....")

loading query....


SessionError: Session closed

In [7]:
## Loading contributed_to relationships
rel_contributed_to_query = """
// Load 'person' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
MATCH (p:Package { package: row.package })
MATCH (a:Person { person: COALESCE(row.author, "N/A")})

MERGE (a)-[:CONTRIBUTED_TO]->(p);

"""
print(f"loading query....")
connection.query(rel_contributed_to_query)
connection.close()
print(f"end operation....")

loading query....


  session = self.driver.session(database="cran")


end operation....


In [8]:
## Loading contributed_to relationships
rel_maintains_query = """
// Load 'person' nodes from the CSV file
LOAD CSV WITH HEADERS FROM 'file:///cran_process_data.csv' AS row
MATCH (p:Package { package: row.package })
MATCH (a:Person { person: COALESCE(row.maintainer_name,"N/A")})

MERGE (a)-[:MAINTAINS]->(p);

"""
print(f"loading query....")
connection.query(rel_maintains_query)
connection.close()
print(f"end operation....")

loading query....


  session = self.driver.session(database="cran")


end operation....


In [41]:
from graphutils.utils import Neo4jConnect

connection = Neo4jConnect('admin', 'cran2graph')
connection.test_neo4j_connection()
connection.close()

In [22]:
from graphutils.utils import Neo4jConnect

connection = Neo4jConnect('admin', 'cran2graph')
connection.clean_database()
connection.close()

In [31]:
drop_constrainst = """

// Disable the unique constraint on 'Package' nodes
DROP INDEX ON :Package(name);

"""

connection.query(drop_constrainst)
connection.close()

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Indexes cannot be dropped by schema, please drop by name instead: DROP INDEX index_name. The index name can be found using SHOW INDEXES. (line 4, column 1 (offset: 54))
"DROP INDEX ON :Package(name);"
 ^}

In [32]:
for constraint in connection.query("CALL db.constraints"):
        connection.query("DROP " + constraint[0])

ClientError: {code: Neo.ClientError.Procedure.ProcedureNotFound} {message: There is no procedure with the name `db.constraints` registered for this database instance. Please ensure you've spelled the procedure name correctly and that the procedure is properly deployed.}