In [None]:
# Plot evaluation measurements
 
import os
import json 
import pandas as pd
import seaborn as sns
import networkx as nx
from matplotlib import pyplot as plt
from collaboration_network.metrics import Density


def parse_json(x: pd.Series):
    
    modularity = json.loads(x.modularity_by_min_size_cc)['result']
    modularity_density = json.loads(x.modularity_density_by_min_size_cc)['result']
    intra_density = json.loads(x.intra_cluster_density_by_min_size_cc)['result']
    inter_density = json.loads(x.inter_cluster_density_by_min_size_cc)['result']
    
    # Key: str -> int
    modularity = {int(k):v for k,v in modularity.items()}
    modularity_density = {int(k):v for k,v in modularity_density.items()}
    intra_density = {int(k):v for k,v in intra_density.items()}
    inter_density = {int(k):v for k,v in inter_density.items()}


    min_sizes = sorted(set(modularity))

    for min_size in min_sizes:
        ret.append(dict(
            run_id = x.run_id,
            method = x.method,
            min_size = min_size,
            modularity = modularity[min_size],
            modularity_density = modularity_density[min_size],
            intra_cluster_density = intra_density[min_size],
            inter_cluster_density = inter_density[min_size],
        ))

def plot_density(G, output=None):
    density = Density(min_size_connected_component='auto')
    result = density(G, None)
    fig = plt.figure()
    plt.plot(result.min_size, result.result)
    plt.xlabel('min_size')
    plt.ylabel('density')
    if output is not None:
        plt.savefig(output)
    else:
        plt.show()

graph_names = ['G_dir_alpha0.8_00-04', 'G_dir_alpha0.8_02-06', 'G_dir_alpha0.8_04-08', 'G_dir_alpha0.8_06-10', 'G_dir_alpha0.8_08-12']


for graph_name in graph_names:
    eval_dir = f'pdf/vis/{graph_name}/'
    os.makedirs(eval_dir, exist_ok=True)

    df = pd.read_csv(f'log/{graph_name}_benchmark_full.csv')

    df = df[df.method != 'louvain_res-0.2']
    ret = []

    df.apply(parse_json, axis=1)

    df_by_min_size = pd.DataFrame(ret)

    cols = ['modularity', 'modularity_density', 'intra_cluster_density', 'inter_cluster_density']

    # print(df_by_min_size.dtypes)

    for col in cols:
        # df_sub = df_by_min_size[['run_id', 'method', 'min_size', col]]

        sns_plot = sns.relplot(df_by_min_size, x='min_size', y=col, hue='method' , kind='line', height=4)

        sns.despine()
        # plt.show()
        sns_plot.set_xlabels('min_size', fontsize=12)
        sns_plot.set_ylabels(col, fontsize=12)

        sns_plot.figure.savefig(f'{eval_dir}/{col}.pdf')
    G = nx.read_gml(f'graphs/{graph_name}.gml')
    plot_density(G, f'{eval_dir}/density.pdf')

In [None]:
# Report summary
def summary(df):
    cols = ['exec_time', 'num_clusters', 'modularity', 'modularity_density', 'intra_cluster_density', 'inter_cluster_density']
    ret = {}
    for col in cols:
        mean = df[col].mean()
        std = df[col].std()
        ret[col] = (mean, std)

    return pd.Series(ret)

graph_name = 'G_dir_alpha0.8_00-04'
# graph_name = 'G_dir_alpha0.8_02-06'
# graph_name = 'G_dir_alpha0.8_04-08'
# graph_name = 'G_dir_alpha0.8_06-10'
# graph_name = 'G_dir_alpha0.8_08-12'

df = pd.read_csv(f'log/{graph_name}_benchmark_full.csv')
df = df[df.method != 'louvain_res-0.2']
df.groupby(['method']).apply(summary)