In [None]:
import math
import powerlaw
import statistics

import networkx as nx
import matplotlib.pyplot as plt

from collections import Counter

In [None]:
def load_graph(name):
    graph_path = "../graphs/{}.gml".format(name)
    return nx.read_gml(graph_path)

In [None]:
# Load the chosen graph
name = "General Assembly/serbia/net_ga_2012_2024"
fname = f'{name}_filled'
cname = f'{name}_cutoff'

net = load_graph(name)
fnet = load_graph(fname)
cnet = load_graph(cname)

### Basic Characteristics

In [None]:
print(f'Total countries: {net.number_of_nodes()}')

In [None]:
print(f'Standard graph density: {nx.density(net)}')
print(f'Cut-off graph density: {nx.density(cnet)}')

In [None]:
# Rich club check based on the cut-off graph
netDegrees = nx.degree(cnet, weight='weight')
richClubNet = nx.subgraph(cnet, [x for x in net.nodes() if netDegrees[x] > 80])
print(f'Rich Club of {richClubNet.number_of_nodes()} nodes - Density: {nx.density(richClubNet)}')

In [None]:
cutoff_is_connected = nx.is_connected(cnet)
print(f'Cutoff is connected: {cutoff_is_connected}')
if cutoff_is_connected:
    print(f'Cut-off average distance: {nx.average_shortest_path_length(cnet)}')
    print(f'Cut-off diameter: {nx.diameter(cnet)}')
else:
    print(f'Cut-off number of components: {nx.number_connected_components(cnet)}')

In [None]:
print(f'Filled average clustering: {nx.average_clustering(fnet, weight="weight")}')
print(f'Cut-off average clustering: {nx.average_clustering(cnet, weight="weight")}')
print(f'Cut-off global clustering: {nx.transitivity(cnet)}')

In [None]:
# Create cut-off equivalent Erdos Renyi and ScaleFree networks
n = cnet.number_of_nodes()
m = cnet.number_of_edges()
p = ( 2*float(m) ) / ( n* (n-1) )

netER = nx.erdos_renyi_graph(n, p)
netSFMulti = nx.scale_free_graph(n)

netSF = nx.DiGraph()
for u,v in netSFMulti.edges():
    if netSF.has_edge(u,v):
        netSF[u][v]['weight'] += 1
    else:
        netSF.add_edge(u, v, weight=1)
        
print(f'Cut-off ER Average clustering: {nx.average_clustering(netER)}')
print(f'Cut-off SF Average clustering: {nx.average_clustering(netSF)}')

In [None]:
def plot_clustering(net, weight = None, logScale = False):
    clustering = nx.clustering(net, weight=weight)
    for key, value in clustering.items():
        clustering[key] = math.floor(value * 40) / 40

    clust_counts = Counter(clustering.values())
    print(clust_counts)
    x, y = zip(*clust_counts.items())

    plt.ylabel('frequency')
    plt.xlabel('weighted clustering') if weight != None else plt.xlabel('clustering')

    plt.bar(clust_counts.keys(), clust_counts.values(), 0.025, align='edge', color='darkred', log=logScale, edgecolor="black")

In [None]:
plot_clustering(fnet, weight = "weight", logScale = False)

In [None]:
plot_clustering(cnet, weight = "weight", logScale = False)

In [None]:
print(f"Filled assortativity coefficient: {nx.degree_assortativity_coefficient(fnet)}")
print(f"Filled weighted koeficijent asortativnosti: {nx.degree_assortativity_coefficient(fnet, weight='weight')}")
print(f"Cut-off assortativity coefficient: {nx.degree_assortativity_coefficient(cnet)}")
print(f"Cut-off weighted koeficijent asortativnosti: {nx.degree_assortativity_coefficient(cnet, weight='weight')}")

In [None]:
# Plot degree distrubution
def plot_deg_frequency(net, weight = None, xscale = "log", yscale = "log"):
    degrees = dict(net.degree(weight=weight))
    for key, value in degrees.items():
        degrees[key] = 5 * round(value/5)

    deg_counts = Counter(degrees.values())
    print(deg_counts)
    x, y = zip(*deg_counts.items())

    plt.figure(1)
    
    plt.xlabel('weighted degree') if weight != None else plt.xlabel('degree')
    plt.xscale(xscale)
    plt.xlim(min(x), max(x))

    plt.ylabel('frequency')
    plt.yscale(yscale)
    plt.ylim(1, max(y))

    plt.scatter(x, y, marker='.')
    plt.show()

In [None]:
# Show weighted degree distrubution
# Is it power-law?
plot_deg_frequency(net, weight='weight', xscale='linear', yscale='linear')

### Least and most friendly countries

In [None]:
def plot_agreement(net, xscale = "log", yscale = "log"):
    agreements = [ edge[2]['agreement'] for edge in net.edges(data=True) ]
    agreements = list(map(lambda x: math.floor(x * 40) / 40, agreements))
    
    print(f'Median agreement: {statistics.median(agreements)}')
    print(f'Average agreement: {statistics.fmean(agreements)}')
    
    agreement_counts = Counter(agreements)
    print(agreement_counts)
    x, y = zip(*agreement_counts.items())

    plt.figure(1)
    
    plt.xlabel('agreement')
    plt.xscale(xscale)
    plt.xlim(0, max(x))

    plt.ylabel('frequency')
    plt.yscale(yscale)
    plt.ylim(1, max(y))

    plt.scatter(x, y, marker='.')
    plt.show()

In [None]:
# Show the distribution of 'agreement' values of the filled graph
plot_agreement(fnet, xscale='linear', yscale='linear')

In [None]:
def friendly_edge_to_string(edge, country_buffer_1, country_buffer_2):
    country1 = edge[0].ljust(country_buffer_1)
    country2 = edge[1].ljust(country_buffer_2)
    
    total = str(edge[2]['total']).ljust(5)
    points = str(edge[2]['points']).rjust(6)
    agreement = round(edge[2]['agreement'] * 100, 1)
    
    return '{}, {} - {}/{} ({}%)'.format(country1, country2, points, total, agreement)

def edge_country_1_length(edge):
    return len(edge[0])

def edge_country_2_length(edge):
    return len(edge[1])

def edge_country_2_length(edge):
    return len(edge[1])

def friendly_edge_print(edges):
    max_cnt_1_length = max(map(edge_country_1_length, edges))
    max_cnt_2_length = max(map(edge_country_2_length, edges))
    for edge_str in map(lambda e: friendly_edge_to_string(e, max_cnt_1_length, max_cnt_2_length), edges):
        print(edge_str)

In [None]:
# Sort edges by agreement
def has_significant_total(edge):
    return edge[2]['total'] >= 0

sorted_edges = sorted(net.edges(data=True), key=lambda edge: edge[2]['agreement'])
sorted_edges =  list(__builtin__.filter(has_significant_total, sorted_edges))

In [None]:
# Show countries with the least agreement
friendly_edge_print(sorted_edges[:5])

In [None]:
# Show countries with the most agreement
friendly_edge_print(sorted_edges[-5:])

In [None]:
# Average agreement for every country
avg_agreements = {}
for country_1 in fnet:
    agreements = (fnet[country_1][country_2]['agreement'] for country_2 in fnet[country_1])
    avg_agreements[country_1] = statistics.mean(agreements)

In [None]:
print('Countries with lowest average agreement:\n')
for k, v in sorted(avg_agreements.items(), key=lambda item: item[1])[:10]:
    print(f'{k}: {round(v * 100, 1)}%')

In [None]:
print('Countries with highest average agreement:\n')
for k, v in sorted(avg_agreements.items(), key=lambda item: -item[1])[:10]:
    print(f'{k}: {round(v * 100, 1)}%')

### Centrality

In [None]:
# Calculate normalized weight using Z-Score
def calculate_normalized_weight(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    meanw = statistics.fmean(weights)
    stdw = statistics.stdev(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_normalized'] =  (weight - meanw) / stdw

    # Calculate normalized weight by scaling z-score weight to 0-1 and cutting of extreme values
    weights = [ edge[2]['weight_normalized'] for edge in net.edges(data=True) ]

    maxw = max(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight_normalized']
        my_weight = weight / (maxw * 2) + 0.5
        edge[2]['weight_normalized'] =  my_weight if my_weight > 0 else 0.000001

    # Calculate reciprocal of the normalized weight
    for edge in net.edges(data=True):
        edge[2]['reciprocal'] = 1 / edge[2]['weight_normalized']
        
calculate_normalized_weight(net)

In [None]:
def print_centrality(data, high_count=5, low_count=5):
    vals = sorted(data.items(), key=lambda x: x[1], reverse=True)
    vals = list(map(lambda val: (val[0], round(val[1], 1)), vals))
    
    print('Higest values:')
    for val in vals[:high_count]:
        print(f'{val[0]}: {val[1]}');
        
    print('\nLowest values:')
    for val in vals[-low_count:]:
        print(f'{val[0]}: {val[1]}');

In [None]:
# Print weighted degree centrality
print_centrality(dict(net.degree(weight='weight')), 5, 5)

In [None]:
# Standard graph betweeness centrality
print_centrality(dict(nx.betweenness_centrality(net, weight='reciprocal', normalized=False)), 10, 5)