In [1]:
from neo4j import GraphDatabase
import pandas as pd

In [2]:
# Define paths to your CSV files in respective folders
path_pubmed_33_nodes = "pubmed_33/nodes.csv"
path_pubmed_66_nodes = "pubmed_66/nodes.csv"
path_pubmed_full_nodes = "pubmed_full/nodes.csv"

path_pubmed_33_edges = "pubmed_33/edges.csv"
path_pubmed_66_edges = "pubmed_66/edges.csv"
path_pubmed_full_edges = "pubmed_full/edges.csv"

# Read the CSV files into pandas DataFrames
pubmed_33_nodes = pd.read_csv(path_pubmed_33_nodes)
pubmed_66_nodes = pd.read_csv(path_pubmed_66_nodes)
pubmed_full_nodes = pd.read_csv(path_pubmed_full_nodes)

pubmed_33_edges = pd.read_csv(path_pubmed_33_edges)
pubmed_66_edges = pd.read_csv(path_pubmed_66_edges)
pubmed_full_edges = pd.read_csv(path_pubmed_full_edges)

In [3]:
# "First Create an empty database, within a project"

uri = "bolt://localhost:7687"
username = "neo4j" # username for Neo4j, most probably is Neo4j, if you did not change while installing Neo4j Desktop
password = "eigen1234" # Password for database.
database_name = "d3.pubmed"  # Database name

driver = GraphDatabase.driver(uri, auth=(username, password))

In [4]:
def check_connection():
    try:
        # Establish a session with the specified database
        with driver.session(database=database_name) as session:
            # Run a simple query to check the connection
            result = session.run("RETURN 'Connection to database successful' AS message")
            for record in result:
                print(record["message"])
    except Exception as e:
        print("Error connecting to the database:", e)

# Call the check_connection function
check_connection()

Connection to database successful


In [5]:
# Function to create nodes in the specified database with a dynamic label
def create_nodes(data, driver, label):
    """Create nodes in Neo4j from a CSV file."""
    try:
        # Load the CSV data
        node_data = pd.read_csv(data)
        
        with driver.session(database=database_name) as session:
            for _, row in node_data.iterrows():
                properties = row.to_dict()
                properties['features'] = eval(properties['features'])  # Convert string back to list if saved as string
                query = f"CREATE (n:{label} {{id: $id, features: $features, label: $label}})"
                session.run(query, **properties)

    except Exception as e:
        print("Error during node creation:", e)    
    finally:
        # Close the driver
        driver.close()

In [6]:
def create_edges(data, driver, node_label, edge_label):
    """Create undirected edges in Neo4j from a CSV file, ensuring only one edge is created between two nodes."""
    try:
        # Load the edge list
        edge_data = pd.read_csv(data)
        
        with driver.session(database=database_name) as session:
            for _, row in edge_data.iterrows():
                # Ensure the smaller id is always the 'source' and the larger id is the 'target'
                source_id = min(row['source_id'], row['target_id'])
                target_id = max(row['source_id'], row['target_id'])
                
                # Cypher query to create an undirected edge (just one edge, not two directions)
                query = f"""
                MATCH (source:{node_label} {{id: $source_id}})
                MATCH (target:{node_label} {{id: $target_id}})
                MERGE (source)-[:{edge_label} {{value: 1}}]->(target)
                """
                
                # Run the query with the sorted source and target ids
                session.run(query, {"source_id": source_id, "target_id": target_id})

    except Exception as e:
        print("Error during edge creation:", e)
    finally:
        # Close the driver
        driver.close()

In [7]:
create_nodes(path_pubmed_33_nodes,driver,"pubmed_33")
create_edges(path_pubmed_33_edges,driver,"pubmed_33","cites_pubmed_33")
create_nodes(path_pubmed_66_nodes,driver,"pubmed_66")
create_edges(path_pubmed_66_edges,driver,"pubmed_66","cites_pubmed_66")
create_nodes(path_pubmed_full_nodes,driver,"pubmed_full")
create_edges(path_pubmed_full_edges,driver,"pubmed_full","cites_pubmed_full")