In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width:80% !important; }</style>"))

In [3]:
# load libraries and set plot parameters
import math
import numpy as np
import networkx as nx
import pandas as pd
import sys
import random
from datetime import date
from itertools import groupby

## Extract features for binary removal

In [4]:
def local_path(G,nodelist,epsilon = 0.01):
    A = nx.adjacency_matrix(G,weight=None).todense()
    return(A**2+epsilon*A**3)

In [6]:
def features_extraction_binary_removal(graphs,date):
    # local similarity indices
    common_neighbor, salton_index, jaccard_index, sorensen_index, hub_promoted_index,hub_depressed_index, \
    leicht_holme_newman_index, prederential_attachment_index, adamic_adar_index, resource_allocation_index \
    = [],[],[],[],[],[],[],[],[],[]
    # global features
    edge_betweeness_centrality,local_path_index = [],[]
    
    #mix features
    number_of_flights,number_of_passenges,distance,flights_centrality,passenges_centrality,distance_centrality = [],[],[],[],[],[]
    core_max,core_min =[],[]
 
    # label 0 for failure and 1 for remained
    time,year,label,edge = [],[],[],[]
    
    for i in range(len(graphs)-1):
        G = graphs[i]
        H = graphs[i+1]
        G.add_nodes_from([n for n in H if n not in G])
        H.add_nodes_from([n for n in G if n not in H])
        removed_edges = list(nx.difference(G,H).edges())
        
        Ki = G.degree()

        EBC = nx.edge_betweenness_centrality(G)    
        nodelist = list(G.nodes())
        LPI = local_path(G,nodelist)
        CN = nx.core_number(G)
        for e in list(G.edges()):            
            u,v = e                           
            uv_intersection = list(nx.common_neighbors(G,u,v))
            union_size = len(set(G[u])|set(G[v]))
            common_neighbor.append(len(uv_intersection))
            salton_index.append(len(uv_intersection)/np.sqrt(Ki[u]*Ki[v]))
            jaccard_index.append(len(uv_intersection)/union_size)
            sorensen_index.append(2*len(uv_intersection)/(Ki[u]+Ki[v]))
            hub_promoted_index.append(len(uv_intersection)/min(Ki[u],Ki[v]))
            hub_depressed_index.append(len(uv_intersection)/max(Ki[u],Ki[v]))
            leicht_holme_newman_index.append(len(uv_intersection)/(Ki[u]*Ki[v]))
            prederential_attachment_index.append(Ki[u]*Ki[v])
            local_path_index.append(LPI[nodelist.index(u),nodelist.index(v)])
            
            edge_betweeness_centrality.append(EBC[e])
            core_max.append(max(CN[u],CN[v]))
            core_min.append(min(CN[u],CN[v]))          
            if len(uv_intersection) == 0:
                adamic_adar_index.append(0)
                resource_allocation_index.append(0)
            else:    
                adamic_adar_index.append(sum([1/math.log(Ki[z]) for z in uv_intersection]))
                resource_allocation_index.append(sum(1/Ki[z] for z in uv_intersection))
            year.append(date[i])
            edge.append(e)
            
            list1 = [~g.has_edge(*e) for g in graphs[i+1:]]
            count_dups = [sum(1 for _ in group) for _, group in groupby(list1)]   
            if e in removed_edges: 
                label.append(1)
                time.append(count_dups[0])
            else:
                label.append(0)
                time.append(0)
       
        sys.stdout.write('\r {} of {} is done'.format(i, len(graphs)))
        sys.stdout.flush() 
        
        
    data = pd.DataFrame({'common_neighbor':common_neighbor,
                  'salton_index':salton_index,
                  'jaccard_index':jaccard_index,
                  'sorensen_index':sorensen_index,
                  'hub_promoted_index':hub_promoted_index,
                  'hub_depressed_index':hub_depressed_index,
                  'leicht_holme_newman_index':leicht_holme_newman_index,
                  'prederential_attachment_index':prederential_attachment_index,
                   'adamic_adar_index':adamic_adar_index,
                  'resource_allocation_index':resource_allocation_index,
                    'local_path_index':local_path_index,
                   'edge_betweeness_centrality':edge_betweeness_centrality,
                         'core_max':core_max,
                         'core_min': core_min,
                    'time':time,
                   'year':year,
                   'label': label,
                    'edge':edge})
    
    return(data)

In [7]:
df_air = pd.read_pickle('../data/networks/US_air_1990_2018.pkl')
df_air = df_air[df_air.source != df_air.target]
year = list(df_air.index.get_level_values(0).unique())
month = list(df_air.index.get_level_values(1).unique())
graphs_air = []
date_air = []
for y in year:
    for m in month:
#        if y != 2018 or m != 12:
        df = df_air.loc[y,m]
        date_air.append(date(y,m,1))
        G = nx.from_pandas_edgelist(df_air.loc[y,m], edge_attr=True)
        graphs_air.append(G)
graphs_air=graphs_air

In [None]:
features_air_removal = features_extraction_binary_removal(graphs_air,date_air)

 95 of 348 is done

In [None]:
features_air_removal.to_pickle('../results/us_air_data_binary_removal.pkl')

In [8]:
df_bus = pd.read_pickle('../data/networks/bus_2005_2014.pkl')
df_bus = df_bus[df_bus.source != df_bus.target]
year = list(df_bus.index.get_level_values(0).unique())
month = list(df_bus.index.get_level_values(1).unique())
graphs_bus = []
date_bus = []
for y in year:
    for m in month:
#        if y != 2018 or m != 12:
        df = bus_network(y,m)
        date_bus.append(date(y,m,1))
        G = nx.from_pandas_edgelist(df, edge_attr=True)
        graphs_bus.append(G)

NameError: name 'bus_network' is not defined

In [None]:
features_bus_removal = features_extraction_binary_removal(graphs_bus,date_bus)

In [None]:
features_bus_removal.to_pickle('../results/brazil_bus_data_binary_removal.pkl')

## Extract features for weight change

In [None]:
def local_path_weight(G,nodelist,epsilon = 0.01):
    A = nx.adjacency_matrix(G,weight='flight').todense()
    return(A**2+epsilon*A**3)

In [3]:
def features_extraction_weight_change(graphs,date):
    # local similarity indices
    common_neighbor, salton_index, jaccard_index, sorensen_index, hub_promoted_index,hub_depressed_index, \
    leicht_holme_newman_index, prederential_attachment_index, adamic_adar_index, resource_allocation_index \
    = [],[],[],[],[],[],[],[],[],[]
    # global features
    edge_betweeness_centrality,local_path_index = [],[]

    # label changing in weight
    time,year,label,edge = [],[],[],[]
    
    for i in range(len(graphs)-1):
        G = graphs[i]
        H = graphs[i+1]
        G.add_nodes_from([n for n in H if n not in G])
        H.add_nodes_from([n for n in G if n not in H])
        removed_edges = list(nx.difference(G,H).edges())
        added_edges = list(nx.difference(H,G).edges()) 
        Ki = G.degree()
        EBC = nx.edge_betweenness_centrality(G)    
        nodelist = list(G.nodes())
        LPI = local_path_weight(G,nodelist)
        for e in list(G.edges()):            
            u,v = e                           
            uv_intersection = list(nx.common_neighbors(G,u,v))
            union_size = len(set(G[u])|set(G[v]))
            common_neighbor.append(len(uv_intersection))
            salton_index.append(len(uv_intersection)/np.sqrt(Ki[u]*Ki[v]))
            jaccard_index.append(len(uv_intersection)/union_size)
            sorensen_index.append(2*len(uv_intersection)/(Ki[u]+Ki[v]))
            hub_promoted_index.append(len(uv_intersection)/min(Ki[u],Ki[v]))
            hub_depressed_index.append(len(uv_intersection)/max(Ki[u],Ki[v]))
            leicht_holme_newman_index.append(len(uv_intersection)/(Ki[u]*Ki[v]))
            prederential_attachment_index.append(Ki[u]*Ki[v])
            local_path_index.append(LPI[nodelist.index(u),nodelist.index(v)])            
            edge_betweeness_centrality.append(EBC[e])
            
            if len(uv_intersection) == 0:
                adamic_adar_index.append(0)
                resource_allocation_index.append(0)
            else:    
                adamic_adar_index.append(sum([1/math.log(Ki[z]) for z in uv_intersection]))
                resource_allocation_index.append(sum(1/Ki[z] for z in uv_intersection))
            year.append(date[i])
            edge.append(e)
            
            list1 = [~g.has_edge(*e) for g in graphs[i+1:]]
            count_dups = [sum(1 for _ in group) for _, group in groupby(list1)]   
            if e in removed_edges: 
                label.append(G[u][v]['flights']-0)
                time.append(count_dups[0])
            elif e in added_edges:
                label.append(0-H[u][v]['flights'])
                time.append(count_dups[0])
            else:
                label.append(G[u][v]['flights']-H[u][v]['flights'])
                time.append(0)
       
        sys.stdout.write('\r {} of {} is done'.format(i, len(graphs)))
        sys.stdout.flush() 
        
        
    data = pd.DataFrame({'common_neighbor':common_neighbor,
                  'salton_index':salton_index,
                  'jaccard_index':jaccard_index,
                  'sorensen_index':sorensen_index,
                  'hub_promoted_index':hub_promoted_index,
                  'hub_depressed_index':hub_depressed_index,
                  'leicht_holme_newman_index':leicht_holme_newman_index,
                  'prederential_attachment_index':prederential_attachment_index,
                   'adamic_adar_index':adamic_adar_index,
                  'resource_allocation_index':resource_allocation_index,
                    'local_path_index':local_path_index,
                   'edge_betweeness_centrality':edge_betweeness_centrality,
                    'time':time,
                   'year':year,
                   'label': label,
                    'edge':edge})
    
    return(data)