In [1]:
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import to_rgb, to_rgba
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.manifold import SpectralEmbedding
import utils
import scipy.sparse.linalg as lg
import networkx as nx
import networkx.algorithms.cluster as cluster
%matplotlib inline

In [2]:
def plot_subgraphfeatures(completeness_by_core,\
                          size_by_core,\
                          clustering_by_core):
    plt.rc('font', size=14)          # controls default text sizes
    plt.rc('axes', titlesize=18)     # fontsize of the axes title
    #plt.rc('axes', titleweight="bold")     # fontweight of the axes title
    plt.rc('axes', labelsize=14)    # fontsize of the x and y labels
    plt.rc('xtick', labelsize=12)    # fontsize of the tick labels
    plt.rc('ytick', labelsize=12)    # fontsize of the tick labels
    
    increase_by_k = np.array(completeness_by_core[1:]) - np.array(completeness_by_core[:-1])
    threshold = np.argmax(increase_by_k) + 1 #threshold is k before the biggest increase
    fig, ax_density = plt.subplots(figsize=(8,5))
    fig.subplots_adjust(right=0.75)
    ax_size = ax_density.twinx()
    ax_clustering = ax_density.twinx()
    ax_clustering.spines["right"].set_position(("axes", 1.2))

    density_handle, = ax_density.plot(list(range(1, len(cores) + 1)), completeness_by_core, "b", label="Density")
    ax_density.vlines(x=threshold,ymin=0, ymax=max(completeness_by_core), color="red", linestyles="dashed")
    ax_density.text(threshold, -0.01, str(threshold))
    
    ax_density.set_title("Subgraph Features by Core")
    ax_density.set_xlabel("k")
    ax_density.set_ylabel("Graph Completeness (%)")
    
    size_handle, = ax_size.plot(list(range(1, len(cores) + 1)), size_by_core, "m", label="Size")
    ax_size.set_ylabel("Size")
    
    clustering_handle, = ax_clustering.plot(list(range(1, len(cores) + 1)), clustering_by_core, "g", label="Clustering")
    ax_clustering.set_ylabel("Average Clustering")
    
    ax_density.legend(handles=[density_handle, size_handle, clustering_handle])

In [3]:
def plot_numcomponents(cores, components_by_core):
    '''
    Dedicated function to plot the number of components for each k-core. These graphs are for sanity check and dont
    belong in the subgraph features figure.
    '''
    fig, ax = plt.subplots(figsize=(8,5))

    ax.plot(list(range(1, len(cores) + 1)), components_by_core)
    
    ax.set_title("{}: Number of Components for each K-core".format(graphName))
    ax.set_xlabel("k")
    ax.set_ylabel("Number of components")

In [4]:
def save_subgraphdeltas(completeness_by_core,\
                          size_by_core,\
                          clustering_by_core,\
                          transitivity_by_core,\
                          subgraph_features_dict):
    #normalize
    size_by_core = np.array(size_by_core) / max(size_by_core)
    
    #calcualte deltas 
    completeness_delta = utils.delta(completeness_by_core)
    size_delta = utils.delta(size_by_core)
    clustering_delta = utils.delta(clustering_by_core)
    transitivity_delta = utils.delta(transitivity_by_core)
    
    degeneracy = len(cores)
    ks = list(range(2, degeneracy + 1))
    
    subgraph_features_dict["Graph Name"].extend([graphName] * len(ks))
    subgraph_features_dict["K"].extend(ks)
    subgraph_features_dict["Edge Density"].extend(completeness_delta)
    subgraph_features_dict["Size"].extend(size_delta)
    subgraph_features_dict["Clustering Coefficient"].extend(clustering_delta)
    subgraph_features_dict["Transitivity"].extend(transitivity_delta)
    return subgraph_features_dict

In [5]:
#graphNames = ["facebook"]
graphNames = ["wiki", "facebook", "ppi", "physics", "lastfm", "AS", "ER-2", "ER-4", "BA-5", "BA-10", "BTER-from-BA", "BTER-arbitrary"]
subgraph_features_dict = {
    "Graph Name": [],
    "K": [],
    "Edge Density": [],
    "Size": [],
    "Clustering Coefficient": [],
    "Transitivity": []
}
for r in range(len(graphNames)):
    graphName = graphNames[r]
    print(graphName)
    
    with open("cores/{}_cores.pickle".format(graphName), "rb") as pickleFile:
        cores = pickle.load(pickleFile)
    
    completeness_by_core = [utils.core_completeness(core) for core in cores]
    size_by_core = [len(core) for core in cores]
    clustering_by_core = [cluster.average_clustering(core) for core in cores]
    transitivity_by_core = [cluster.transitivity(core) for core in cores]
    components_by_core = [nx.number_connected_components(core) for core in cores]
    
    #plot_numcomponents(cores, components_by_core)
    plot_subgraphfeatures(completeness_by_core, size_by_core, clustering_by_core)
    subgraph_features_dict = save_subgraphdeltas(completeness_by_core, size_by_core, clustering_by_core, transitivity_by_core,\
                                               subgraph_features_dict)

wiki


KeyboardInterrupt: 

In [7]:
subgraph_features_pd = pd.DataFrame(subgraph_features_dict)
description = "Dataframe in which each row is a K-core in a graph.\
The Column for feature f is the difference f_{k} - f_{k-1}. \
The 'Size' variable has been normalized by dividing by the size of the complete graph."
with open("pickles/subgraph_features_pd.pickle", "wb") as pickleFile:
    pickle.dump((description, subgraph_features_pd), pickleFile)