### Meta

In [4]:
# Autoreload modules without having to restart the notebook kernel.
% load_ext autoreload
% autoreload 2


# Plotting code stolen from Georg's notebook.
import matplotlib.pyplot as plt

% matplotlib inline
font = {'family': 'DejaVu Sans',
        'weight': 'bold',
        'size': 32}
plt.rc('font', **font)

import pandas as pd
import numpy as np
import seaborn as sns

# Personal libraries
import lib.files
import lib.graph
import lib.constants
import lib.cluster
import lib.dataframes

import community as community_louvain

In [None]:
# This cell computes the markov clusters for various inflation.
network_name = "icp55-cc-900-inv"
network = lib.graph.read_network(network_name)

for i in np.arange(1.5, 6.5, .5):
    inflation = round(i, 1)  # Round to 1 d.p.
    cluster_name = f"mcl-{inflation}"
    cluster_filename = lib.files.make_clusters_filename(network_name, cluster_name)
    cluster_filepath = lib.files.make_filepath_to_clusters(cluster_filename)
    lib.cluster.run_mcl_and_write_to_file(network, cluster_filepath, inflation)

In [None]:
# This cell calculates various validation metrics various clusterings
network_name = "icp55-cc-900-inv"
network = lib.graph.read_network(network_name)

# Compute for parameter free clusterings
lib.dataframes.cluster_dataframe('icp55-cc-900-inv', 'pc2p')
lib.dataframes.cluster_dataframe('icp55-cc-900-inv', 'louvain')
# Compute for each markov cluster
for i in np.arange(1.5, 6.5, .5):
    inflation = round(i, 1)  # Round to 1 d.p.
    cluster_name = f"mcl-{inflation}"
    lib.dataframes.cluster_dataframe(network_name, cluster_name)

In [None]:
# This cell computes a dataframe of node properties given various clusterings.
import lib.dataframes

lib.dataframes.node_dataframe('icp55-cc-900-inv', 'mcl-2.5', True, True, True, True, True, True, True, True, True, True,
                              True, True, True, True, True, True)
lib.dataframes.node_dataframe('icp55-cc-900-inv', 'pc2p', True, True, True, True, True, True, True, True, True, True,
                              True, True, True, True, True, True)
lib.dataframes.node_dataframe('icp55-cc-900-inv', 'louvain', True, True, True, True, True, True, True, True, True, True,
                              True, True, True, True, True, True)

In [None]:
# This cell computes the louvain clustering

network = lib.graph.read_network('icp55-cc-900-inv')
partition = community_louvain.best_partition(network)

partition = sorted(partition.items(), key=lambda x: x[1])

clusters = []
for (node, cluster) in partition:
    if len(clusters) == cluster:
        clusters.append([])
    clusters[-1].append(node)

lib.cluster.write_to_file(
    lib.files.make_filepath_to_clusters(
        lib.files.make_clusters_filename('icp55-cc-900-inv', 'louvain')),
    clusters)

In [None]:
# This cell plots the loglog size distribution of clusters and saves to a file.

def size_dist(clusters):
    return sorted(map(len, clusters))

def plot_loglog(xs, title, xlabel, ylabel, filepath=None):
    m = min(xs)
    M = max(xs)
    x0 = np.arange(m, M + 1, dtype=float)
    x1 = (x0) ** (-1)
    x2 = (x0) ** (-2)
    x3 = (x0) ** (-3)
    numb_bin = 100
    hb = np.arange(1, 10)
    hb = np.append(hb, np.logspace(np.log10(10), np.log10(M), numb_bin))
    plt.xlim(3, M)
    plt.hist(xs, bins=hb, density=True)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.xscale('log')
    plt.yscale('log')
    plt.plot(x1, label="alpha = 1")
    plt.plot(x2, label="alpha = 2")
    plt.plot(x3, label="alpha = 3")
    plt.legend()
    if filepath is not None:
        plt.savefig(filepath)


# Example given of 6.0
inflation = 6.0
clusters_name = f"mcl-{inflation}"
clusters = lib.cluster.read_clusters(network_name, clusters_name)
plot_loglog(size_dist(clusters), f"mcl-{inflation} size distribution", "Size", "Percentage",
            lib.files.make_filepath_to_graphs(f'mcl-{inflation}-loglog-size-distribution.png'))