In [1]:
import os
import networkx as nx
import pandas as pd
import random

In [2]:
edgelist = pd.read_csv('data/cora/cora.cites', sep='\t', names=['target', 'source'])
edgelist['label'] = 'cites'

In [3]:
G = nx.from_pandas_edgelist(edgelist, 
                            edge_attr="label", 
                            source='source',
                            target='target'
                           )
# Get Largest Connected Component
gs = [G.subgraph(c) for c in nx.connected_components(G)]
G = max(gs, key=len)

In [4]:
class GraphProperties:
    DEGREE_CENTRALITY = (1, 'Degree Centrality')
    BETWEENNESS_CENTRALITY = (2, 'Betweenness Centrality')
    
    @classmethod
    def get_graph_property(cls, G, prop):
        if(prop == 1):
            return cls._average_degree_centrality(G)
        if(prop == 2):
            return cls._average_betweenness_centrality(G)
      
    def _average_betweenness_centrality(G):
        return sum(dict(nx.betweenness_centrality(G)).values())/float(len(G))
    

    def _average_degree_centrality(G):
        return sum(dict(nx.degree_centrality(G)).values())/float(len(G))             

In [5]:
class GraphModifier:
    graph = None
    
    def __init__(self, graph):
        self.graph = graph.copy()
        
    # returns graph with edges removed, but leaves self.graph unchanged
    def remove_random_edges(self, num_edges):
        prev_edges = []
        H = self.graph.copy()
        for i in range(0, num_edges):
            total_edges = H.number_of_edges()
            rand_edge_index = random.choice([x for x in range(total_edges) if x not in prev_edges]) # SLOW
            prev_edges.append(rand_edge_index)
            edge_tuple = list(H.edges)[rand_edge_index]
            H.remove_edge(*edge_tuple)
        return H

In [6]:
def perturb_sparsify_and_print_property(G, pert_options, graph_property, num_trials):
    
    print(graph_property[1], " of the spanner of perturbed graph")
    print("Perturbation: ", pert_options['num_edges'], " edges are randomly ", pert_options['pert_type'])
    
    modifier = GraphModifier(G)
    for i in range(num_trials):
        H = modifier.remove_random_edges(pert_options['num_edges'])
        Hsparse = nx.spanner(H,3,seed=1)
        prop = GraphProperties.get_graph_property(H, graph_property[0])
        print(prop)
        
def sparsify_and_print_property(G, graph_property):
    Hsparse = nx.spanner(G,3,seed=1)
    prop = GraphProperties.get_graph_property(G, graph_property[0])
    print(graph_property[1], " of the spanner of unperturbed graph")
    print(prop)
    print()
    

In [7]:
# Run with DEGREE CENTRALITY

num_trials = 3 # How many times to repeat experiement
graph_property = GraphProperties.DEGREE_CENTRALITY
pert_options = {'pert_type': 'remove',
                'num_edges': 50}

sparsify_and_print_property(G, graph_property)
perturb_sparsify_and_print_property(G, pert_options, graph_property, num_trials)

Degree Centrality  of the spanner of unperturbed graph
0.0016423824752054316

Degree Centrality  of the spanner of perturbed graph
Perturbation:  50  edges are randomly  remove
0.0016261822140572208
0.0016261822140572215
0.001626182214057223


In [58]:
# Run with BETWEENNESS CENTRALITY

graph_property = GraphProperties.BETWEENNESS_CENTRALITY

sparsify_and_print_property(G, graph_property)
perturb_sparsify_and_print_property(G, pert_options, graph_property, num_trials)

Betweenness Centrality  of the spanner of unperturbed graph
0.002138944293716756

Betweenness Centrality  of the spanner of perturbed graph
Perturbation:  50  edges are randomly  remove
0.0021444836999672924
0.002135896321909279
0.002146518551939475
