In [None]:
from neo4j import GraphDatabase
from itertools import islice
import os



neo4j_uri = "bolt://localhost:7687"
USER = "neo4j"
PASSWORD = "neo4jtest"
DATA_DIR = "./facebook"
BATCH_SIZE = 500

driver = GraphDatabase.driver(neo4j_uri, auth=(USER, PASSWORD))


(Optional) Code to delete data from database

In [None]:
with driver.session() as session:
    session.run("CALL apoc.schema.assert({}, {});")
    session.run("MATCH (n) DETACH DELETE n")

In [None]:
# Useful function to run cypher queries
def run_cypher(tx_or_query, params=None, many=False):
    """Convenience: pass either a session.run() style query or a tx to run."""
    with driver.session() as s:
        return list(s.run(tx_or_query, params or {}))

def chunked(iterable, size=500):
    it = iter(iterable)
    while True:
        chunk = list(islice(it, size))
        if not chunk:
            break
        yield chunk


In [None]:
q = "CREATE CONSTRAINT IF NOT EXISTS FOR (u:User) REQUIRE u.id IS UNIQUE"
run_cypher(q)

# Loading Dataset

In [None]:

def load_ego_network(ego_id, path):
    print(f"ðŸš€ Loading ego network {ego_id}")

    edges_path = os.path.join(path, f"{ego_id}.edges")
    circles_path = os.path.join(path, f"{ego_id}.circles")
    feat_path = os.path.join(path, f"{ego_id}.feat")
    featnames_path = os.path.join(path, f"{ego_id}.featnames")
    egofeat_path = os.path.join(path, f"{ego_id}.egofeat")

    # --- Create uniqueness constraint ---
    run_cypher("CREATE CONSTRAINT user_id IF NOT EXISTS FOR (u:User) REQUIRE u.id IS UNIQUE;")
    run_cypher("CREATE CONSTRAINT circle_id IF NOT EXISTS FOR (c:Circle) REQUIRE c.id IS UNIQUE;")

    # --- Import edges ---
    with open(edges_path) as f:
        edges = [line.strip().split() for line in f if line.strip()]

    print(f"   Found {len(edges)} edges.")
    for i in range(0, len(edges), 500):
        batch = edges[i:i+500]
        run_cypher("""
        UNWIND $rows AS row
        MERGE (a:User {id: row[0]})
        MERGE (b:User {id: row[1]})
        MERGE (a)-[:FRIEND_WITH]-(b)
        """, {"rows": batch})

    # --- Mark ego node ---
    run_cypher("MERGE (e:User {id:$ego}) SET e.ego=true", {"ego": ego_id})

    # --- Import circles ---
    if os.path.exists(circles_path):
        with open(circles_path) as f:
            for i, line in enumerate(f):
                parts = line.strip().split()
                if len(parts) < 2:
                    continue
                circle_name = parts[0]
                members = parts[1:]
                cid = f"{ego_id}_{circle_name or 'C' + str(i)}"
                run_cypher("""
                MERGE (c:Circle {id:$cid}) SET c.ego=$ego
                WITH c
                UNWIND $members AS uid
                  MERGE (u:User {id:uid})
                  MERGE (u)-[:MEMBER_OF]->(c)
                """, {"cid": cid, "ego": ego_id, "members": members})
        print("   Circles imported.")

    # --- Import features ---
    if os.path.exists(featnames_path):
        with open(featnames_path) as f:
            featnames = [line.strip() for line in f]

    if os.path.exists(feat_path):
        with open(feat_path) as f:
            lines = [line.strip() for line in f]
        for i, line in enumerate(lines):
            data = [int(x) for x in line.split()]
            id = data[0]
            features = data[1:]
            print(len(features))
            feature_dict = {featnames[j]: features[j] for j in range(min(len(features), len(featnames)))}
            print(f"   {id} features imported.")
            run_cypher("MATCH (u:User {id:$id}) SET u += $features", {"id": id, "features": feature_dict})

        print(f"   Features loaded for {len(lines)} users.")

    # --- Ego features ---
    if os.path.exists(egofeat_path):
        with open(egofeat_path) as f:
            ego_features = [int(x) for x in f.read().strip().split()]
        ego_dict = {featnames[j]: ego_features[j] for j in range(min(len(ego_features), len(featnames)))}
        print(ego_dict, ego_id)
        run_cypher("MATCH (e:User {id:$id}) SET e += $features", {"id": ego_id, "features": ego_dict})

    print(f"âœ… Done loading ego network {ego_id}.\n")

load_ego_network("0", DATA_DIR)