# Twitch

In [None]:
%load_ext autoreload

import json
import numpy as np
from twitch_utils import (
    load_features,
    remove_features,
    load_edges,
    create_graph,
    choose_largest_cc,
    get_real_communities,
    set_weights,
    evaluate_metric,
)

## Load Graph

In [None]:
features = load_features("./dataset/large_twitch_features.csv")
print("Loaded", len(features.index), "nodes")

## Remove unused features and clean

In [None]:
features_to_remove = ["OTHER"]
features = remove_features(features, features_to_remove)
print("Kept", len(features.index), "nodes after removing", features_to_remove)

In [None]:
features_to_remove = ["EN"]
features = remove_features(features, features_to_remove)
print("Kept", len(features.index), "nodes after removing", features_to_remove)

In [None]:
edges = load_edges("./dataset/large_twitch_edges.csv", features)
print("Loaded", len(edges.index), "edges")

## Create the Graph object

In [None]:
G = create_graph(edges)

print(
    "Created graph with", G.number_of_nodes(), "nodes and", G.number_of_edges(), "edges"
)

In [None]:
G = choose_largest_cc(G)
print(
    "Chose largest connected component with",
    G.number_of_nodes(),
    "nodes and",
    G.number_of_edges(),
    "edges",
)

## Get the real communities

In [None]:
real_communities = get_real_communities(G, features)

print("There are ", len(real_communities), "communites with the folowing counts:")
for language, community in real_communities:
    print("-", language, len(community))

## Define the different metrics

In [None]:
from metrics import *

## Define the tests to execute

In [None]:
tests = [
    ("unitary", metric_unitary, np.max),
    ("degree max", metric_degree, np.max),
    ("degree mean", metric_degree, np.mean),
    ("degree min", metric_degree, np.min),
    ("degree diff", metric_degree, lambda x: np.abs(x[0] - x[1])),
    ("degree max inverse", metric_degree, lambda x: 1 / np.max(x)),
    ("degree_centrality max", metric_degree_centrality, np.max),
    ("degree_centrality mean", metric_degree_centrality, np.mean),
    ("degree_centrality min", metric_degree_centrality, np.min),
    ("degree_centrality diff", metric_degree_centrality, lambda x: np.abs(x[0] - x[1])),
    (
        "degree_centrality max inverse",
        metric_degree_centrality,
        lambda x: 1 / np.max(x),
    ),
    ("eigenvector_centrality max", metric_eigenvector_centrality, np.max),
    ("eigenvector_centrality mean", metric_eigenvector_centrality, np.mean),
    ("eigenvector_centrality min", metric_eigenvector_centrality, np.min),
    (
        "eigenvector_centrality diff",
        metric_eigenvector_centrality,
        lambda x: np.abs(x[0] - x[1]),
    ),
    (
        "eigenvector_centrality max inverse",
        metric_eigenvector_centrality,
        lambda x: 1 / np.max(x),
    ),
    ("pagerank max", metric_pagerank, np.max),
    ("pagerank mean", metric_pagerank, np.mean),
    ("pagerank min", metric_pagerank, np.min),
    ("pagerank diff", metric_pagerank, lambda x: np.abs(x[0] - x[1])),
    ("pagerank max inverse", metric_pagerank, lambda x: 1 / np.max(x)),
    ("clustering max", metric_clustering, np.max),
    ("clustering mean", metric_clustering, np.mean),
    ("clustering min", metric_clustering, np.min),
    ("clustering diff", metric_clustering, lambda x: np.abs(x[0] - x[1])),
    ("clustering max inverse", metric_clustering, lambda x: 1 / np.max(x)),
    ## Too Long to calculate
    # ("closeness_centrality max", metric_closeness_centrality, max),
    # ("betweeness_centrality max", metric_betweeness_centrality, np.min),
]

## Execute the tests and save the results

In [None]:
for test in tests:
    result = {}
    print("Evaluating", test[0])
    set_weights(G, test[1](G), test[2])
    print("Generated weights")
    print("Evaluating...")
    scores = evaluate_metric(G, real_communities, 5)
    result["mean"] = np.mean(scores)
    result["std"] = np.std(scores)
    result["min"] = np.min(scores)
    result["max"] = np.max(scores)
    result["scores"] = scores
    print("Saving")
    with open(f"eval/{test[0]}.json", "w") as f:
        json.dump(result, f, indent=4)