In [2]:
%load_ext autoreload

In [3]:
import networkx as nx
import pandas as pd
import json

from collections import defaultdict
from typing import Dict, Set, List


In [46]:
def load_graph(src: str) -> nx.Graph:
    G = nx.read_gexf(src)
    mapping = {node: int(node) for node in G.nodes()}
    return nx.relabel_nodes(G, mapping)

def load_features(src: str) -> pd.DataFrame:
    features_to_remove = ["EN", "OTHER"]
    features = pd.read_csv(src)
    features = features[~features["language"].isin(features_to_remove)]
    features = features[["language", "numeric_id"]]
    features.reset_index(inplace=True, drop=True)
    return features

def load_real_features(src: str, graph: nx.Graph) -> tuple[pd.DataFrame, Dict[str, Set[int]]]:
    features = load_features(src)
    real_communities: Dict[str, Set[int]] = defaultdict(set)
    for node in graph.nodes:
        language = features[features["numeric_id"] == node].iloc[0]["language"]
        real_communities[language].add(node)
    return (features, real_communities)

def unitary_degree(graph: nx.Graph, edge):
    return 1

def max_degree(graph: nx.Graph, edge):
    n1, n2 = edge
    d1, d2 = graph.degree(n1), graph.degree(n2)
    return max(d1, d2)

def max_pow_degree(graph: nx.Graph, edge):
    n1, n2 = edge
    d1, d2 = graph.degree(n1), graph.degree(n2)
    return max(d1, d2)**2

def sum_of_degree(graph: nx.Graph, edge):
    n1, n2 = edge
    d1, d2 = graph.degree(n1), graph.degree(n2)
    return d1 + d2

def sum_pow_of_degree(graph: nx.Graph, edge):
    n1, n2 = edge
    d1, d2 = graph.degree(n1), graph.degree(n2)
    return (d1 + d2)**2

def define_weights(graph: nx.Graph, func):
    weights = {}
    for edge in graph.edges():
        weights[edge] = func(graph, edge)
    return weights

def get_communities(graph: nx.Graph) -> List[Set[int]]:
    calculated_communities: List[Set[int]] = list(
    nx.algorithms.community.asyn_lpa_communities(graph, weight="weights")
        )
    return calculated_communities

def calculate_score(real_communities: Dict[str, Set[int]], calculated_communities: Dict[str, Set[int]]):
    total_score = 0
    for language, nodes in real_communities.items():
        best = (0, -1)
        for i, community in enumerate(calculated_communities):
            intersection_len = len(community.intersection(nodes))
            if intersection_len > best[0]:
                best = (intersection_len, i)
        total_score += best[0]
    return total_score


In [39]:
src_graph = "./graphs/twitch/graph.gexf"
src_features = "./datasets/twitch/large_twitch_features.csv"
G = load_graph(src_graph)
real_features, real_communities = load_real_features(src_features, G)

In [50]:
essais = {
    "unit": unitary_degree,
    "max": max_degree, 
    "max_pow": max_pow_degree,
    "sum": sum_of_degree,
    "sum_pow": sum_pow_of_degree,
    }

results = {}

for name, func_to_use in essais.items():

    weights = define_weights(G, func_to_use)
    nx.set_edge_attributes(G, weights, "weights" + name)

    calculated_communities: List[Set[int]] = list(
        nx.algorithms.community.asyn_lpa_communities(G, weight="weights" + name)
    )

    total_score = calculate_score(real_communities, calculated_communities)

    print(f'{name} function')
    print("Out of", G.number_of_nodes(), "nodes,", total_score, "nodes matched correctly")
    accuracy = total_score / G.number_of_nodes()
    print("Accuracy", accuracy)
    results[name] = (total_score, accuracy)


unit function
Out of 41265 nodes, 37271 nodes matched correctly
Accuracy 0.903210953592633
max function
Out of 41265 nodes, 37402 nodes matched correctly
Accuracy 0.9063855567672362
max_pow function
Out of 41265 nodes, 36113 nodes matched correctly
Accuracy 0.8751484308736217
sum function
Out of 41265 nodes, 37354 nodes matched correctly
Accuracy 0.9052223433902823
sum_pow function
Out of 41265 nodes, 35918 nodes matched correctly
Accuracy 0.8704228765297468


In [51]:
with open('./graphs/twitch/scores.json', 'w') as f:
    # Convert the dictionary to JSON and write it to the file
    json.dump(results, f)