# Need to fix the h-index. why average weighted link devided by nodes same question for betwennes

# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import community.community_louvain as community
import os, sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

# Extract Backbones from Networks

In [2]:
#--------------------------------------------
#import all utils modules
from Utils.utils import shannon_entropy

#--------------------------------------------
#import all backbones modules
from Backbones import h_backbone as hb
from Backbones import disparity_filter as disf
from Backbones.doubly_stochastic import read, doubly_stochastic as ds
from Backbones.high_salience_skeleton import high_salience_skeleton as hss


#--------------------------------------------
#create a dictionary to save the backbones and their parameters used for each network
backbones = []
#network_backbones= dict()
network_backbone_parameters = dict()
network_backbone_measures = dict()


#--------------------------------------------
#read network names from the dataset folder
networks = ['lesmis']


#loop through each network and extract the backbones
for network in networks:
    
    #--------------------------------------------
    #read edge list from csv file
    edge_list = pd.read_csv('../Datasets/' + network + '.csv')
    
    #read edge list from csv file for the doubly stochastic filter, noice corrected and the high salience skeleton
    table, nnodes, nnedges = read("../Datasets/" + network + '.csv', "weight", sep=',', consider_self_loops=False, triangular_input = True, undirected=True) 

    
    #--------------------------------------------
    #create graph from the edge list
    G = nx.from_pandas_edgelist(edge_list, edge_attr='weight', create_using=nx.Graph())
    
    #extract the number of nodes N and number of edges E from the graph and the percentage of edges to preserce 
    N = len(G.nodes())
    E = len(G.edges())
    
    
    #--------------------------------------------
    #create numpy array having the top 30% values as True while others are False
    E_percentage = int(0.3*E)
    backbone_edges = np.full(E, False)
    backbone_edges[0:E_percentage] = True
    
    #create the results dataframe and edge column that is used to merge backbones dataframes later
    backbone_results = edge_list.copy()
    backbone_results["edge"] = backbone_results.apply(lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis = 1)
    
    #initialize the network dictionary to save the backbones parameters used
    network_backbone_parameters[network] = dict()
    
    
    #--------------------------------------------
    #append the backbone short name to the list of backbones
    backbones.append('df')
    
    #apply the disparity filter algorithm
    backbone = disf.disparity_filter(G)
    
    #create an edge list from the result graph with the scores
    df_backbone = nx.to_pandas_edgelist(backbone)
    
    #sort alpha values of the edges
    df_backbone = df_backbone.sort_values(by='alpha')
    
    #add a new column that assigns a boolean values for each edge stating if it is included in the backbone or not
    df_backbone = df_backbone.assign(df_backbone = backbone_edges)
    
    #extract the alpha-value that gives this percentage of edges and add it to the backbones parameters dictionary
    df_alpha = round(df_backbone[E_percentage-1:E_percentage]['alpha'].iloc[0],3)
    network_backbone_parameters[network]['df_alpha'] = df_alpha
    
    #create edge column that is used to merge backbone with the backbone_results dataframe
    df_backbone["edge"] = df_backbone.apply(lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis = 1)
    
    #drop the weights column from the backbone dataframe(already available in the main dataframe) and rename the columns
    df_backbone = df_backbone.drop(columns=['source', 'target', 'weight'])
    df_backbone.columns=['df_alpha', 'df_backbone', 'edge']

    #merge the disparity filter results to the backbone_results results dataframe
    backbone_results = pd.merge(backbone_results,df_backbone, on=['edge'])


    #--------------------------------------------
    #append the backbone short name to the list of backbones
    backbones.append('ds')
    
    #apply the doubly stochastic filter algorithm
    ds_backbone = ds(table, undirected = True, return_self_loops = False)

    #sort score values of the edges
    ds_backbone = ds_backbone.sort_values(by='score', ascending=False)
    
    #add a new column that assigns a boolean values for each edge stating if it is included in the backbone or not
    ds_backbone = ds_backbone.assign(ds_backbone = backbone_edges)
        
    #create edge column that is used to merge backbone with the backbone_results dataframe
    ds_backbone["edge"] = ds_backbone.apply(lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis = 1)
    
    #drop the weights column from the backbone dataframe(already available in the main dataframe) and rename the columns
    ds_backbone = ds_backbone.drop(columns=['source', 'target', 'weight'])
    ds_backbone.columns=['ds_score', 'ds_backbone', 'edge']
    
    #merge the doubly stochastic filter results to the backbone_results results dataframe
    backbone_results = pd.merge(backbone_results,ds_backbone, on=['edge'])

    
    #--------------------------------------------
    #append the backbone short name to the list of backbones
    backbones.append('hss')
    
    #apply the high salience skeleton algorithm
    hss_backbone = hss(table, return_self_loops=False, undirected=True)
    
    #sort score values of the edges
    hss_backbone = hss_backbone.sort_values(by='score', ascending=False)
    
    #add a new column that assigns a boolean values for each edge stating if it is included in the backbone or not
    hss_backbone = hss_backbone.assign(hss_backbone = backbone_edges)
    
    #extract the score-value that gives this percentage of edges and add the value to the dictionary
    hss_score = round(hss_backbone[E_percentage-1:E_percentage]['score'].iloc[0],3)
    network_backbone_parameters[network]['hss_score'] = hss_score
    
    #create edge column that is used to merge backbone with the backbone_results dataframe
    hss_backbone["edge"] = hss_backbone.apply(lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis = 1)
    
    #drop the weights column from the backbone dataframe(already available in the main dataframe) and rename the columns
    hss_backbone = hss_backbone.drop(columns=['source', 'target', 'weight'])
    hss_backbone.columns=['hss_score', 'hss_backbone', 'edge']
    
    #merge the high salience skeleton results to the backbone_results results dataframe
    backbone_results = pd.merge(backbone_results,hss_backbone, on=['edge'])
    
    
    #--------------------------------------------
    #append the backbone short name to the list of backbones
    backbones.append('h')
    
    #apply the disparity filter algorithm and add the h-bridge and h-weight values to the dictionary
    h_bridge, h_weight, backbone = hb.h_backbone(G)
    network_backbone_parameters[network]['h_bridge'] = h_bridge
    network_backbone_parameters[network]['h_weight'] = h_weight
    
    #create an edge list from the result graph with the scores
    h_backbone = nx.to_pandas_edgelist(backbone)
    
    #create edge column that is used to merge backbone with the backbone_results dataframe
    h_backbone["edge"] = h_backbone.apply(lambda x: "%s-%s" % (min(x["source"], x["target"]), max(x["source"], x["target"])), axis = 1)
    
    #drop the weights column from the backbone dataframe(already available in the main dataframe) and rename the columns
    h_backbone = h_backbone.drop(columns=['source', 'target', 'weight'])
    #h_backbone.columns=['h_backbone', 'h_bridge', 'edge']
    
    #merge the h_backbone results to the backbone_results results dataframe
    backbone_results = pd.merge(backbone_results,h_backbone, on=['edge'])
    
    
    #--------------------------------------------
    #drop the edge column before saving 
    backbone_results = backbone_results.drop(columns='edge')
    
    #save backbone results to csv file and to a dictionary for further calculations
    #network_backbones[network] = backbone_results
    backbone_results.to_csv('../Results/Backbones Results/' + network + '.csv', index=False)
    
    #save backbone parameters to csv file and to a dictionary for further calculations
    network_backbones_parameters = pd.DataFrame({network: network_backbone_parameters[network]})
    network_backbones_parameters.to_csv('../Results/Backbones Results/' + network + '_params.csv', index=False)
    
    
    #--------------------------------------------
    #calculate evaluation measures: the structural properties
    measures = ['nodes_fraction', 'average_weighted_degree', 'average_link_weight', 'average_betweeness', 'density', 'entropy', 'weighted_modularity']
    
    #initialize the dataframe measures for the network backbones
    network_measures = pd.DataFrame(columns = [name+'_backbone' for name in backbones], index=measures)
        
    #loop through all extracted backbones for the network
    for backbone_name in backbones:
        
        
        #--------------------------------------------
        #create the backbone graph after extracting only edges are preserved in the backbone.i.e only True values of _backbone 
        backbone = backbone_results[backbone_results[backbone_name + '_backbone']][['source', 'target', 'weight']]
        G = nx.from_pandas_edgelist(backbone, edge_attr='weight', create_using=nx.Graph())
        
        #take only the largest connected component
        largest_cc = max(nx.connected_components(G), key=len)
        G = G.subgraph(largest_cc).copy()
        
        
        #--------------------------------------------
        #calculate the fraction of nodes preserved
        node_fraction = len(G.nodes())/N
        network_measures[backbone_name+'_backbone']['nodes_fraction'] = node_fraction
        
        
        #--------------------------------------------
        #calculate the average weighted degree 
        average_weighted_degree = sum([G.degree(node, weight='weight') for node in G.nodes()])/len(G.nodes())
        network_measures[backbone_name+'_backbone']['average_weighted_degree'] = average_weighted_degree
        
        
        #--------------------------------------------
        #calculate the average link weight 
        average_link_weight = sum([G.edges()[edge]['weight'] for edge in G.edges()])/len(G.nodes())
        network_measures[backbone_name+'_backbone']['average_link_weight'] = average_link_weight
        
        
        #--------------------------------------------
        #calculate the average betwenness
        average_betweeness = sum(nx.edge_betweenness_centrality(G, weight='weight', normalized=False).values())/len(G.nodes())
        network_measures[backbone_name+'_backbone']['average_betweeness'] = average_betweeness
        
        
        #--------------------------------------------
        #calculate the density
        density = round((2*len(G.edges()))/(len(G.nodes())*(len(G.nodes())-1)), 3)
        network_measures[backbone_name+'_backbone']['density'] = density
        
        
        #--------------------------------------------
        #calculate the entropy
        entropy = round(shannon_entropy(G), 3)
        network_measures[backbone_name+'_backbone']['entropy'] = entropy
        
        
        #--------------------------------------------
        #calculate the weighted modulartiy
        communities = community.best_partition(G, random_state=1)
        weighted_modularity = round(community.modularity(communities, G, weight='weight'), 3)
        network_measures[backbone_name+'_backbone']['weighted_modularity'] = weighted_modularity
        
        
        
        
        
    
    #add the measurese of all backbones to the dictionary of measures for all network and save it to csv
    network_backbone_measures[network] = network_measures
    network_measures.to_csv('../Results/Backbones Results/' + network + '_measures.csv', index=False)
    
    
    