In [1]:
import os
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

In [2]:
node_data = pd.read_csv('../data/node_data.csv', index_col=0)

In [3]:
G = nx.Graph()

# dodavanje grana
edges_file = "../data/facebook/facebook_combined.txt"
edges = pd.read_csv(edges_file, sep=' ', header=None, names=['node1', 'node2'])
G.add_edges_from(edges.values)

# dodavanje atributa cvorovima
for node_id, attributes in node_data.iterrows():
    nx.set_node_attributes(G, {node_id: attributes.to_dict()})

In [4]:
directory = "../data/facebook/facebook"  

node_to_ego = {}

# ucitavanje podataka za ego korisnike
for filename in os.listdir(directory):
    if filename.endswith(".circles"):
        ego_id = int(filename.split('.')[0])
        
        circles_file = os.path.join(directory, filename)
        with open(circles_file, 'r') as file:
            circles = [line.strip().split('\t') for line in file]
        
        for circle in circles: 
            for node in circle[1:]:
                node_id = int(node)
                if node_id not in node_to_ego:
                    node_to_ego[node_id] = set()  
                node_to_ego[node_id].add(ego_id) 

node_to_ego = {node: list(egos) for node, egos in node_to_ego.items()}

In [5]:
print(f"Broj čvorova u grafu: {G.number_of_nodes()}")
print(f"Broj ivica u grafu: {G.number_of_edges()}")

Broj čvorova u grafu: 4039
Broj ivica u grafu: 88234


In [6]:
print(f"Broj atributa: {len(G.nodes[0])}")

Broj atributa: 2255


In [7]:
import csv

output_csv_file = "node_to_ego.csv"

with open(output_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)

    # zaglavlje
    writer.writerow(['node_id', 'ego_id'])

    # podaci za svaki cvor
    for node_id, egos in node_to_ego.items():
        for ego_id in egos:
            writer.writerow([node_id, ego_id])

## ciljna promenljiva

In [8]:
# mapiranje ego_id u indeks

unique_ego_ids = sorted(list(set([ego_id for egos in node_to_ego.values() for ego_id in egos])))
ego_id_to_index = {ego_id: i for i, ego_id in enumerate(unique_ego_ids)}

# dodavanje ciljne promenljive kao atributa "target" svakom cvoru
for node in G.nodes:
    target_vector = [0] * len(unique_ego_ids)  
    if node in node_to_ego:
        for ego_id in node_to_ego[node]:
            target_index = ego_id_to_index[ego_id] 
            target_vector[target_index] = 1 
            
    G.nodes[node]['target'] = target_vector

In [9]:
output_csv_file = "ego_to_index.csv"

with open(output_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)

    # zaglavlje
    writer.writerow(['ego_id', 'index'])

    # podaci za svaki cvor
    for ego_id, index in ego_id_to_index.items():
        writer.writerow([ego_id, index])

In [10]:
import pickle

# serijalizacija grafa
with open("graph.pkl", "wb") as f:
    pickle.dump(G, f)

In [11]:
G.nodes[17]

{'0 birthday;anonymized feature 0': 1,
 '0 birthday;anonymized feature 1003': 0,
 '0 birthday;anonymized feature 1172': 0,
 '0 birthday;anonymized feature 2': 0,
 '0 birthday;anonymized feature 206': 0,
 '0 birthday;anonymized feature 208': 0,
 '0 birthday;anonymized feature 209': 0,
 '0 birthday;anonymized feature 376': 0,
 '0 birthday;anonymized feature 6': 0,
 '0 birthday;anonymized feature 729': 0,
 '1 birthday;anonymized feature 0': 0,
 '1 birthday;anonymized feature 1': 0,
 '1 birthday;anonymized feature 1004': 0,
 '1 birthday;anonymized feature 2': 0,
 '1 birthday;anonymized feature 207': 0,
 '1 birthday;anonymized feature 208': 0,
 '1 birthday;anonymized feature 3': 0,
 '1 birthday;anonymized feature 730': 0,
 '1 education;concentration;id;anonymized feature 14': 0,
 '10 birthday;anonymized feature 1006': 0,
 '10 birthday;anonymized feature 211': 0,
 '10 birthday;anonymized feature 7': 0,
 '10 education;classes;id;anonymized feature 10': 0,
 '10 education;concentration;id;anony