In [2]:
import pandas as pd
import networkx as nx

In [13]:
def process_network(path):
    #load network
    #G = nx.read_weighted_edgelist(path, create_using=nx.DiGraph)
    G = nx.read_graphml(path)
    
    #remove self loops
    selfloops = nx.selfloop_edges(G)
    print("Removed " + str(len(list(selfloops))) + "Selfloops")
    G.remove_edges_from(selfloops)
    
    #convert to undirected
    UG = G.to_undirected()
    ucount = 0
    for node in G:
        for ngbr in nx.neighbors(G, node):
            if node in nx.neighbors(G, ngbr):
                #print(ngbr,node)
                ucount = ucount + 1
                UG.edges[node, ngbr]['weight'] = (
                    G.edges[node, ngbr]['weight'] + G.edges[ngbr, node]['weight']
                )
    print("Found " + str(ucount/2) + " Directed Edges")
    
    #take the largest connected component
    ccount = nx.number_connected_components(UG)
    largest_cc = max(nx.connected_components(UG), key=len)
    UG = UG.subgraph(largest_cc)
    print("Found " + str(ccount) + " Connected Components")
    print("\nThe largest connected components has " + str(len(largest_cc)) + " nodes and " + str(len(UG.edges())))
    
    #return undirected network
    return UG

In [14]:
network_properties = pd.DataFrame(index = ['# Nodes', '# Edges', 'Average Degree', 'Density', 'Assortativity', 'Diameter', 'Average Clustering'])

In [15]:
network = 'airports'
path = '../Datasets/' + network + '.graphml'

In [16]:
G = process_network(path)

Removed 0Selfloops
Found 16665.0 Directed Edges
Found 1 Connected Components

The largest connected components has 2734 nodes and 16665


In [17]:
#nx.to_pandas_edgelist(G).to_csv( network+'.csv', index=False)

In [18]:
properties = dict()

properties['# Nodes'] = len(G)
properties['# Edges'] = len(G.edges())
properties['Average Degree'] = round(sum([k for u,k in G.degree()])/len(G), 3)
properties['Density'] = round((2*len(G.edges()))/(len(G.nodes())*(len(G.nodes())-1)), 3)
properties['Assortativity'] = round(nx.degree_assortativity_coefficient(G), 3)
properties['Diameter'] = nx.diameter(G)
properties['Average Clustering'] = round(nx.average_clustering(G), 3)

In [19]:
network_properties[network] = properties.values()

In [20]:
network_properties.T

Unnamed: 0,# Nodes,# Edges,Average Degree,Density,Assortativity,Diameter,Average Clustering
airports,2734.0,16665.0,12.191,0.004,-0.047,12.0,0.464
