In [None]:
from utility_functions import *

In [None]:
import pandas as pd
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt

## Load graph

In [None]:
G_Combined = nx.read_weighted_edgelist("Data/Combined_Edge_List.csv", create_using=nx.Graph, delimiter=',')

### Retaining only the largest connected component in G_Combined

In [None]:
components = list(nx.connected_components(G_Combined))
large_components = [c for c in components if len(c) >= 5]
G_Combined = G_Combined.subgraph(large_components[0])

In [None]:
nx.info(G_Combined)

In [None]:
nodePos = nx.spring_layout(G_Combined, seed=23)

## GRD Analysis

### Block size = 1

In [None]:
R, lcc_sizes , graph_snapshots, directly_removed_nodes_properties = disruption_GRD_analysis(G_Combined, main_centr='GRD-Analysis')

In [None]:
df = pd.DataFrame.from_dict(directly_removed_nodes_properties, orient='index').reset_index()
df.columns = ['node', 'betweenness_rank', 'degree_rank', 'is_ap']

### Block size = 2

In [None]:
R2, lcc_sizes_2, graph_snapshots_2, directly_removed_nodes_properties_2 = disruption_GRD_analysis(G_Combined, main_centr="GRD-Analysis", block_size=2, percentage=0.2)

In [None]:
flat_list = [item for sublist in directly_removed_nodes_properties_2.values() for item in sublist]
df2 = pd.DataFrame(flat_list)

### Block size = 3

In [None]:
R_3, lcc_sizes_3, graph_snapshots_3, directly_removed_nodes_properties_3 = disruption_GRD_analysis(G_Combined, main_centr="GRD-Analysis", block_size=3, percentage=0.2)

In [None]:
flat_list = [item for sublist in directly_removed_nodes_properties_3.values() for item in sublist]
df3 = pd.DataFrame(flat_list)

### Block size = 4

In [None]:
R_4, lcc_sizes_4, graph_snapshots_4, directly_removed_nodes_properties_4 = disruption_GRD_analysis(G_Combined, main_centr="GRD-Analysis", block_size=4, percentage=0.2)

flat_list = [item for sublist in directly_removed_nodes_properties_4.values() for item in sublist]
df4 = pd.DataFrame(flat_list)

In [None]:
from matplotlib.ticker import StrMethodFormatter
import matplotlib.gridspec as gridspec

fig = plt.figure(figsize=(20, 11))

# Adjust to have 4 rows and 3 columns
gs = gridspec.GridSpec(4, 3, width_ratios=[3.5, 3.6, 0.9]) 

# Common font size
font_size = 16
    
def plot_data(ax, data, xlabel=None, ylabel=None, sort_ascending=True, hatch_style=None):
    data.value_counts().sort_index(ascending=sort_ascending).plot(kind='bar', facecolor='none', edgecolor='black', linewidth=2, ax=ax, hatch=hatch_style)
    if xlabel:
        ax.set_xlabel(xlabel, fontsize=font_size + 1)
    if ylabel:
        ax.set_ylabel(ylabel, fontsize=font_size + 1)
    total = len(data)
    for p in ax.patches:
        percentage = '{:.1f}%'.format(100 * p.get_height()/total)
        ax.annotate(percentage, (p.get_x(), p.get_height() + 0.15), color='black', fontsize=font_size - 3)
    ax.tick_params(axis='x', labelsize=font_size, rotation=0)
    ax.tick_params(axis='y', labelsize=font_size)
    # Remove top and right spines
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

# Formatter for y-axis
y_format = StrMethodFormatter('{x:.0f}')

# First row (no x-labels)
ax0 = plt.subplot(gs[0])
plot_data(ax0, df['betweenness_rank'], ylabel='Frequency (b=1)', hatch_style=' ')
ax0.yaxis.set_major_formatter(y_format)

plot_data(plt.subplot(gs[1]), df['degree_rank'], hatch_style='//')
plot_data(plt.subplot(gs[2]), df['is_ap'], sort_ascending=False, hatch_style='..')

# Second row (no x-labels)
plot_data(plt.subplot(gs[3]), df2['betweenness_rank'], ylabel='Frequency (b=2)', hatch_style=' ')
plot_data(plt.subplot(gs[4]), df2['degree_rank'], hatch_style='//')
plot_data(plt.subplot(gs[5]), df2['is_ap'],  sort_ascending=False, hatch_style='..')

# Third row (no x-labels)
plot_data(plt.subplot(gs[6]), df3['betweenness_rank'], ylabel='Frequency (b=3)', hatch_style=' ')
plot_data(plt.subplot(gs[7]), df3['degree_rank'], hatch_style='//')
plot_data(plt.subplot(gs[8]), df3['is_ap'], sort_ascending=False, hatch_style='..')

# Fourth row (with x-labels)
plot_data(plt.subplot(gs[9]), df4['betweenness_rank'], 'Betweenness Rank', 'Frequency (b=4)', hatch_style=' ')
plot_data(plt.subplot(gs[10]), df4['degree_rank'], 'Degree Rank', hatch_style='//')
plot_data(plt.subplot(gs[11]), df4['is_ap'], 'is_AP', sort_ascending=False, hatch_style='..')

plt.tight_layout()
plt.savefig("grd_analysis.pdf")
plt.show()

### Time comparison

In [None]:
%time disruption(G_Combined, main_centr="GRD", block_size=3)

In [None]:
%time disruption(G_Combined, main_centr="SF-GRD", block_size=3)

In [None]:
%time disruption(G_Combined, main_centr="GRD", block_size=4)

In [None]:
%time disruption(G_Combined, main_centr="SF-GRD", block_size=4)

### Comparing different approaches

In [None]:
centr_measures = {'Betweenness': [nx.betweenness_centrality, None],
                  'Betweenness-Degree': [nx.betweenness_centrality, degree_centrality],
                  'CI': [collective_influence_centrality, None],
                  'Degree': [degree_centrality, None],
                  'Degree-Betweenness': [degree_centrality, nx.betweenness_centrality],
                  'CoreHD': ['CoreHD', degree_centrality],
                  'APs-Degree':["APs", degree_centrality],
                  'GRD' : ['GRD', None],
                  'SF-GRD' : ['SF-GRD', None]
                   }

In [None]:
def plot_creation(dflcc, typerem, input_name, w):
    """
    Network Disruption Plot.
    :param tosave: (string) name path.
    :param dflcc: (pandas.core.frame.DataFrame) Largest Connected Component Dataframe.
    :param typerem: (string) Type of node removal. It can be 'Sequential' or 'Block'
    :param input_name: (string) Name of Input Dataset. It can be 'Meeting' or 'PhoneCalls'
    :param w: (string) it can be 'Weighted' or 'Unweighted'
    """
    colnames = list(dflcc.columns)
    n_rows = dflcc.shape[0]
    
    
    sns.set_style("white")
#     plt.rcParams["font.weight"] = "bold"
    plt.rcParams['figure.figsize'] = [20, 10]
#     plt.rcParams["axes.labelweight"] = "bold"

    #xlabel = colnames[0]
    xlabel = dflcc.index
    idx = list(range(0, n_rows, 5))
    idx = list(range(0, dflcc.index[-1], 5))
    plt.grid(True, linestyle=':')
    for ylab in colnames[:]:
        ax = sns.lineplot(x=xlabel, y=ylab, markers=True, dashes=False, data=dflcc, label=ylab, lw=4, marker="o")
    
    ax.set_title(input_name, fontsize=24)
    ax.set_xticks(idx)
    ax.set_xlabel('Number of Nodes Removed', fontsize=20)
    ax.set_ylabel('LCC Size', fontsize=20)
    ax.yaxis.set_label_coords(0.05, 0.5)  # Adjust the x-coordinate to move the label into the graph
    
    ax.legend(fontsize=20)  # , prop=legend_properties)
    ax.tick_params(labelsize=18)
    # Uncomment below for a detailed plot of first 30 iterations, discarding the others.
    # ax.set(xlim=(0, 30))
    fig = plt.gcf()
    plt.show()
    fig.set_size_inches((11, 9), forward=False)
#     fig.savefig('{0}_{1}_{2}-plos.png'.format(input_name, typerem, w),
#                 dpi=300, format='png')
    fig.savefig('{0}_{1}_{2}.pdf'.format(input_name, typerem, w))
    fig.clf()

In [None]:
block_size = 1
R_values, df_lcc_all, graph_snapshots = centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size, percentage=0.30)

In [None]:
block_size = 2
R_values_2, df_lcc_all_2, graph_snapshots_2= centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size)

In [None]:
block_size = 3
R_values_3, df_lcc_all_3, graph_snapshots_3= centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size)

In [None]:
block_size = 4
R_values_4, df_lcc_all_4, graph_snapshots_4= centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size)

In [None]:
# 40%
block_size = 1
pct = 0.4
R_values, df_lcc_all_41, graph_snapshots_41 = centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size, percentage=pct)

In [None]:
block_size = 2
pct = 0.4
R_values, df_lcc_all_42, graph_snapshots_42 = centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size, percentage=pct)

In [None]:
block_size = 3
pct = 0.4
R_values, df_lcc_all_43, graph_snapshots_43 = centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size, percentage=pct)

In [None]:
block_size = 4
pct = 0.4
R_values, df_lcc_all_44, graph_snapshots_44 = centrality_disruption_analysis(G_Combined, centr_measures, include_within_LCC=True, block_size=block_size, percentage=pct)