In [1]:
import csv
import ast
from collections import defaultdict
from neo4j import GraphDatabase

In [2]:
uri = "bolt://10.2.42.255:7687"  # Change this to your Neo4j instance URI
username = "neo4j"             # Replace with your username
password = "pass12345"          # Replace with your password
driver = GraphDatabase.driver(uri, auth=(username, password))

## Importing entities onto neo4j

In [3]:
# Initialize data structures
entity_set = set()
relationship_dict = defaultdict(int)

# Read and process the CSV file
with open('output_final.csv', newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    row_count = 0
    for row in reader:
        if row_count >= 5:  # Stop after processing five rows
            break
        entities_str = row['Entities']
        # Safely parse the string to a list
        entities_list = ast.literal_eval(entities_str)
        # Remove duplicates within the same article
        entity_set_in_article = set()
        for entity_name, entity_type in entities_list:
            entity_set_in_article.add((entity_name, entity_type))
            entity_set.add((entity_name, entity_type))
        entities = list(entity_set_in_article)
        # Generate all unique pairs of entities in the article
        for i in range(len(entities)):
            for j in range(i + 1, len(entities)):
                entity1_name, entity1_type = entities[i]
                entity2_name, entity2_type = entities[j]
                # Order entities to avoid duplicate relationships
                if entity1_name > entity2_name:
                    entity1_name, entity2_name = entity2_name, entity1_name
                    entity1_type, entity2_type = entity2_type, entity1_type
                key = (entity1_name, entity1_type, entity2_name, entity2_type)
                relationship_dict[key] += 1
        row_count += 1

# Function to batch execute queries
def batch_executor(session, queries):
    tx = session.begin_transaction()
    for query, params in queries:
        tx.run(query, **params)
    tx.commit()

with driver.session() as session:
    # Batch create nodes
    node_queries = []
    for entity_name, entity_type in entity_set:
        node_query = (
            "MERGE (e:Entity {name: $name}) "
            "ON CREATE SET e.type = $type",
            {'name': entity_name, 'type': entity_type}
        )
        node_queries.append(node_query)
    batch_executor(session, node_queries)

    # Batch create/update relationships
    rel_queries = []
    for key, weight in relationship_dict.items():
        entity1_name, entity1_type, entity2_name, entity2_type = key
        rel_query = (
            "MATCH (e1:Entity {name: $name1}), (e2:Entity {name: $name2}) "
            "MERGE (e1)-[r:CO_OCCURS_WITH]->(e2) "
            "SET r.weight = coalesce(r.weight, 0) + $weight",
            {'name1': entity1_name, 'name2': entity2_name, 'weight': weight}
        )
        rel_queries.append(rel_query)
    batch_executor(session, rel_queries)

driver.close()
