# Batch‐import into Neo4j via py2neo

In [1]:
import os
import torch
import pandas as pd
import numpy as np
import networkx as nx
from pathlib import Path
from torch_geometric.utils import to_networkx

# ─── Generate CSVs for Neo4j Import ─────────────────────────────────────────────

# Load cleaned PyG data
repo_root    = Path().resolve().parent
cleaned_path = repo_root / "data" / "processed" / "cleaned_data.pt"
data         = torch.load(cleaned_path, weights_only=False)

# Build NetworkX graph
G = to_networkx(data, to_undirected=True)

# Detect Louvain communities (resolution tuned from earlier)
communities = list(nx.community.louvain_communities(G, resolution=3.0))
print(f"Detected {len(communities)} Louvain communities")

# Create a cluster_labels array of length N
N = data.num_nodes
cluster_labels = np.full(N, -1, dtype=int)
for cid, comm in enumerate(communities):
    for node in comm:
        cluster_labels[node] = cid

# Prepare import directory
import_dir = repo_root / "neo4j_import"
import_dir.mkdir(exist_ok=True)

# Build nodes.csv
nodes_df = pd.DataFrame({
    "user_id":    np.arange(N),
    "is_bot":     data.y_bot.numpy(),
    "stance":     data.y_stance.numpy(),
    "train_mask": data.train_mask.numpy(),
    "val_mask":   data.val_mask.numpy(),
    "test_mask":  data.test_mask.numpy()
})
nodes_df.to_csv(import_dir / "nodes.csv", index=False)

# Build edges.csv
edges_df = pd.DataFrame(list(G.edges()), columns=["source", "target"])
edges_df.to_csv(import_dir / "edges.csv", index=False)

# Build clusters.csv
clusters_df = pd.DataFrame({
    "user_id": np.arange(N),
    "cluster": cluster_labels
})
clusters_df.to_csv(import_dir / "clusters.csv", index=False)

print("CSVs generated in:", import_dir)



Detected 97 Louvain communities
CSVs generated in: /Users/dennisberger/Library/Mobile Documents/com~apple~CloudDocs/Uni/FS_2025/Social Media Analytics/project/social-botnet-analytics/neo4j_import
