In [139]:
# Autoreload modules without having to restart the notebook kernel.
# hi bilbo
%load_ext autoreload
%autoreload 2


import sys
import os
import matplotlib.pyplot as plt

# Introduce the project directory to your system's path to make data/lib folders visible.
sys.path.insert(1, "..")

# Plotting code stolen from Georg's notebook.
%matplotlib inline
font = {'family': 'DejaVu Sans',
        'weight': 'bold',
        'size': 32}
plt.rc('font', **font)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [140]:
# Personal libraries
import lib.plot
import lib.cluster as cluster
import lib.graph
import lib.files
import pandas as pd
import networkx as nx
import numpy as np

In [141]:
def centrality_df(centrality_function, centrality_name, cluster_filepath, network_filepath):
    # reading the clusters and network into nx
    clusters = lib.cluster.read_csv(cluster_filepath)
    network = lib.graph.read_weighted_edgelist(network_filepath)
    clusters = [network.subgraph(cluster) for cluster in clusters]
    
    # finding the centrality 
    protein_centrality = []
    for cluster in clusters:
        cent = centrality_function(cluster)
        list_cent = list(cent.items())
        protein_centrality.extend(list_cent)
        
    df = pd.DataFrame(protein_centrality)
    df.rename(columns = {0: "protein", 1: centrality_name} , inplace=True)
    nodes = pd.read_csv(cluster_filepath, index_col= 0)
    
    # append the centrality measures to the original markov clusters in the dataframe
    centrality_df = nodes.merge(df, on = 'protein')
    
    return centrality_df

In [142]:
# obtaining the paths for our network
network_name = 'icp55-cc-900-inv'
network_filepath = lib.files.make_filepath_to_networks(f'{network_name}.txt')
cluster_filepath = lib.files.make_filepath_to_clusters(f'mcl.{network_name}.nodes.csv')

In [143]:
# plugging into the function to calculate the centralities and combining them into one df
centrality_measures = ((nx.betweenness_centrality, "Betweenness Centrality"),
                (nx.degree_centrality, "Degree Centrality"),
                (nx.eigenvector_centrality, "Eigenvector Centrality"))

cent_df = centrality_df(nx.subgraph_centrality, "Subgraph Centrality", cluster_filepath, network_filepath)


for i,j in centrality_measures:
    cent_df = cent_df.merge(centrality_df(i, j, cluster_filepath, network_filepath), on =('protein','cluster'))
    

In [144]:
# outputting as a csv file into the mcl folder
cent_df.to_csv(lib.files.make_filepath_to_mcl_clusters("mcl.icp55-cc-900-inv-centralities.csv"))