# Batch‐import into Neo4j via py2neo

In [3]:
from py2neo import Graph
import torch
from torch_geometric.utils import to_networkx
from pathlib import Path
import os

# Load Neo4j credentials from config
import yaml
config_path = os.path.join("..", "config", "config.yaml")
with open(config_path) as f:
    cfg = yaml.safe_load(f)

neo_cfg = cfg["neo4j"]

# Connect to neo4j
graph = Graph(neo_cfg["uri"], auth=(neo_cfg["user"], neo_cfg["password"]))
print("Neo4j connection test:", graph.run("RETURN 1 AS test").data())

Neo4j connection test: [{'test': 1}]


# Load cleaned PyG data and build NX graph

In [4]:
repo_root = Path().resolve().parent
data = torch.load(repo_root/"data"/"processed"/"cleaned_data.pt", weights_only=False)
G    = to_networkx(data, to_undirected=True)

# Assemble node and edge payload

In [5]:
nodes_payload = [
    {
      "user_id":    int(u),
      "is_bot":     int(data.y_bot[u].item()),
      "stance":     int(data.y_stance[u].item()),
      "train_mask": bool(data.train_mask[u].item()),
      "val_mask":   bool(data.val_mask[u].item()),
      "test_mask":  bool(data.test_mask[u].item()),
    }
    for u in G.nodes()
]

edges_payload = [
    {"source": int(u), "target": int(v)}
    for u,v in G.edges()
]

# Create nodes in batch

In [6]:
graph.run("MATCH (n) DETACH DELETE n")

# Create nodes in batch
graph.run("""
UNWIND $rows AS row
MERGE (u:User {user_id: row.user_id})
  SET u.is_bot     = row.is_bot,
      u.stance     = row.stance,
      u.train_mask = row.train_mask,
      u.val_mask   = row.val_mask,
      u.test_mask  = row.test_mask
""", rows=nodes_payload)

# Create relationships in batch


In [7]:
BATCH_SIZE = 2000
total = len(edges_payload)
for i in range(0, total, BATCH_SIZE):
    batch = edges_payload[i : i + BATCH_SIZE]
    graph.run(
        """
        UNWIND $rows AS row
        MATCH (u:User {user_id: row.source})
        MATCH (v:User {user_id: row.target})
        MERGE (u)-[:FOLLOWS]->(v)
        """,
        rows=batch
    )
    print(f"Imported relationships {i+1:,}–{min(i+BATCH_SIZE, total):,}")


TransientError: [General.OutOfMemoryError] There is not enough memory to perform the current task. Please try increasing 'server.memory.heap.max_size' in the neo4j configuration (normally in 'conf/neo4j.conf' or, if you are using Neo4j Desktop, found through the user interface) or if you are running an embedded installation increase the heap by using '-Xmx' command line flag, and then restart the database.