# Twitch Analysis

In [None]:
%load_ext autoreload

import sys

sys.path.append("../")

import json
import numpy as np
from twitch_utils import (
    load_features,
    remove_features,
    load_edges,
    create_graph,
    choose_largest_cc,
    get_real_communities,
    set_weights,
    evaluate_metric,
    calculate_communities,
    calculate_mapping_stats,
    calculate_score,
)
from utils import highlight_nodes_communities, reset_colors, circular_layout

## Load Graph

In [None]:
features = load_features("./dataset/large_twitch_features.csv")
print("Loaded", len(features.index), "nodes")

## Remove unused features and clean

In [None]:
features_to_remove = ["OTHER"]
features = remove_features(features, features_to_remove)
print("Kept", len(features.index), "nodes after removing", features_to_remove)

In [None]:
all_features = {
    "EN",
    "DE",
    "FR",
    "ES",
    "RU",
    "ZH",
    "PT",
    "JA",
    "IT",
    "KO",
    "PL",
    "SV",
    "TR",
    "NL",
    "FI",
    "TH",
    "CS",
    "DA",
    "HU",
    "NO",
}
features_to_keep = {"NO", "HU"}
features_to_remove = all_features.difference(features_to_keep)
features = remove_features(features, features_to_remove)
print("Kept", len(features.index), "nodes after removing", features_to_remove)

In [None]:
edges = load_edges("./dataset/large_twitch_edges.csv", features)
print("Loaded", len(edges.index), "edges")

## Create the Graph object

In [None]:
G = create_graph(edges)

print(
    "Created graph with", G.number_of_nodes(), "nodes and", G.number_of_edges(), "edges"
)

In [None]:
G = choose_largest_cc(G)
print(
    "Chose largest connected component with",
    G.number_of_nodes(),
    "nodes and",
    G.number_of_edges(),
    "edges",
)

## Get the real communities

In [None]:
real_communities = get_real_communities(G, features)

print("There are ", len(real_communities), "communites with the folowing counts:")
for language, community in real_communities:
    print("-", language, len(community))

## Define the different metrics used

In [None]:
from metrics import *

## Analyze

In [None]:
set_weights(G, metric_unitary(G), np.min)
calculated_communities = calculate_communities(G)
print("Calculated", len(calculated_communities), "communities with counts:")
for com in calculated_communities:
    print("-", len(com))

In [None]:
stats = calculate_mapping_stats(real_communities, calculated_communities)
print(json.dumps(stats, indent=2))

In [None]:
score = calculate_score(stats)
print(json.dumps(score, indent=2))

In [None]:
scores = evaluate_metric(G, real_communities, 500)
print("mean", np.mean(scores) * 100)
print("std", np.std(scores) * 100)
print("min", np.min(scores) * 100)
print("max", np.max(scores) * 100)
print()
print(scores)

In [None]:
circular_layout(G)

In [None]:
reset_colors(G)
highlight_nodes_communities(G, calculated_communities)
nx.write_gexf(G, f"graphs/calc_communities_unitary.gexf")