In [None]:
import math
import powerlaw
import statistics

import networkx as nx
import matplotlib.pyplot as plt

from collections import Counter

In [None]:
def load_graph(name):
    graph_path = "../graphs/{}.gml".format(name)
    return nx.read_gml(graph_path)

In [None]:
# Load the chosen graph
name = "General Assembly/net_ga_all"
fname = f'{name}_filled'
cname = f'{name}_cutoff'

net = load_graph(name)
fnet = load_graph(fname)
cnet = load_graph(cname)

## Basic Characteristics

In [None]:
print(f'Total countries: {net.number_of_nodes()}')

In [None]:
def weighted_density(net):
    if net.number_of_nodes() < 2:
        return 1

    weights = (edge[2]['weight'] for edge in net.edges(data=True))
    possible_edges = (net.number_of_nodes() * (net.number_of_nodes() - 1)) / 2
    return sum(weights) / possible_edges

In [None]:
print(f'Density: {nx.density(net)}')
print(f'Filled weighted density: {weighted_density(fnet)}')

In [None]:
# Rich club check based on the filled graph
netDegrees = nx.degree(fnet, weight='weight')
richClubNet = nx.subgraph(fnet, [x for x in fnet.nodes() if netDegrees[x] > 155])
print(f'Rich Club of {richClubNet.number_of_nodes()} nodes - Weighted density: {weighted_density(richClubNet)}')

In [None]:
cutoff_is_connected = nx.is_connected(cnet)
print(f'Cutoff is connected: {cutoff_is_connected}')
if not cutoff_is_connected:
    print(f'Cut-off number of components: {nx.number_connected_components(cnet)}')

In [None]:
print(f'Filled average clustering: {nx.average_clustering(fnet, weight="weight")}')
print(f'Cut-off average clustering: {nx.average_clustering(cnet, weight="weight")}')
print(f'Cut-off global clustering: {nx.transitivity(cnet)}')

In [None]:
# Create cut-off equivalent Erdos Renyi and ScaleFree networks
n = cnet.number_of_nodes()
m = cnet.number_of_edges()
p = ( 2*float(m) ) / ( n* (n-1) )

netER = nx.erdos_renyi_graph(n, p)
netSFMulti = nx.scale_free_graph(n)

netSF = nx.DiGraph()
for u,v in netSFMulti.edges():
    if netSF.has_edge(u,v):
        netSF[u][v]['weight'] += 1
    else:
        netSF.add_edge(u, v, weight=1)
        
print(f'Cut-off ER average clustering: {nx.average_clustering(netER)}')
print(f'Cut-off SF average clustering: {nx.average_clustering(netSF)}')

In [None]:
def plot_clustering(net, weight = None, logScale = False):
    clustering = nx.clustering(net, weight=weight)
    for key, value in clustering.items():
        clustering[key] = math.floor(value * 40) / 40

    clust_counts = Counter(clustering.values())
    print(clust_counts)
    x, y = zip(*clust_counts.items())

    plt.ylabel('frequency')
    plt.xlabel('weighted clustering') if weight != None else plt.xlabel('clustering')

    plt.bar(clust_counts.keys(), clust_counts.values(), 0.025, align='edge', color='darkred', log=logScale, edgecolor="black")

In [None]:
plot_clustering(fnet, weight = "weight", logScale = False)

In [None]:
def plot_deg_frequency(net, weight = None, bracket_size=5):
    degrees = dict(net.degree(weight=weight))
    for key, value in degrees.items():
        degrees[key] = bracket_size * round(value / bracket_size)

    deg_counts = Counter(degrees.values())
    print(deg_counts)
    x, y = zip(*deg_counts.items())

    plt.figure(1)
    
    plt.xlabel('weighted degree') if weight != None else plt.xlabel('degree')
    plt.xscale('linear')
    plt.xlim(min(x), max(x))

    plt.ylabel('frequency')
    plt.yscale('linear')
    plt.ylim(1, max(y) * 1.1)

    plt.scatter(x, y, marker='.')
    plt.show()

In [None]:
# Show filled graph weighted degree distrubution
# Is it power-law?
plot_deg_frequency(fnet, weight='weight', bracket_size=5)

In [None]:
# Show cut-off graph weighted degree distrubution
# Is it power-law?
plot_deg_frequency(cnet, weight='weight', bracket_size=5)

In [None]:
print(f"Filled assortativity coefficient: {nx.degree_assortativity_coefficient(fnet, weight='weight')}")
print(f"Cut-off assortativity coefficient: {nx.degree_assortativity_coefficient(cnet, weight='weight')}")

## Least and most friendly countries

In [None]:
# Average agreement for every country
avg_agreements = {}
for country_1 in net:
    agreements = (net[country_1][country_2]['agreement'] for country_2 in net[country_1])
    avg_agreements[country_1] = statistics.mean(agreements)

In [None]:
print('Countries with highest average agreement:\n')
for k, v in sorted(avg_agreements.items(), key=lambda item: -item[1])[:10]:
    print(f'{k}: {round(v * 100, 1)}%')

In [None]:
print('Countries with lowest average agreement:\n')
for k, v in sorted(avg_agreements.items(), key=lambda item: item[1])[:10]:
    print(f'{k}: {round(v * 100, 1)}%')

In [None]:
def plot_agreement(net, resolution = 0.025):
    bracket_ratio = 1 / resolution
    agreements = [ edge[2]['agreement'] for edge in net.edges(data=True) ]
    
    print(f'Median agreement: {statistics.median(agreements)}')
    print(f'Average agreement: {statistics.fmean(agreements)}')
    
    agreements = list(map(lambda x: math.floor(x * bracket_ratio) / bracket_ratio, agreements))
    agreement_counts = Counter(agreements)
    print(agreement_counts)
    x, y = zip(*agreement_counts.items())

    plt.figure(1)
    
    plt.xlabel('agreement')
    plt.xlim(0, max(x))

    plt.ylabel('frequency')
    plt.ylim(1, max(y) * 1.1)

    plt.scatter(x, y, marker='.')
    plt.show()

In [None]:
# Show the distribution of 'agreement' values
plot_agreement(net, resolution = 0.025)

In [None]:
def count_relationship_levels(net):
    bad_threshold = 0.5
    good_threshold = 0.787
    great_threshhold = 0.95

    agreements = [ edge[2]['agreement'] for edge in net.edges(data=True) ]
    
    total = len(agreements)
    def to_percentage(val):
        perc = val * 100 / total
        return f'{round(perc)}%'
    
    abysmal = len(list(filter(lambda x: x < bad_threshold, agreements)))
    bad = len(list(filter(lambda x: x >= bad_threshold and x < good_threshold, agreements)))
    good = len(list(filter(lambda x: x >= good_threshold and x < great_threshhold, agreements)))
    great = len(list(filter(lambda x: x >= great_threshhold, agreements)))
    
    print(f'Abysmal: {abysmal} ({to_percentage(abysmal)})')
    print(f'Bad: {bad} ({to_percentage(bad)})')
    print(f'Good: {good} ({to_percentage(good)})')
    print(f'Great: {great} ({to_percentage(great)})')
    
count_relationship_levels(net)

In [None]:
def friendly_edge_to_string(edge, country_buffer_1, country_buffer_2):
    country1 = edge[0].ljust(country_buffer_1)
    country2 = edge[1].ljust(country_buffer_2)
    
    total = str(edge[2]['total']).ljust(5)
    points = str(edge[2]['points']).rjust(6)
    agreement = round(edge[2]['agreement'] * 100, 1)
    
    return '{}, {} - {}/{} ({}%)'.format(country1, country2, points, total, agreement)

def edge_country_1_length(edge):
    return len(edge[0])

def edge_country_2_length(edge):
    return len(edge[1])

def edge_country_2_length(edge):
    return len(edge[1])

def friendly_edge_print(edges):
    max_cnt_1_length = max(map(edge_country_1_length, edges))
    max_cnt_2_length = max(map(edge_country_2_length, edges))
    for edge_str in map(lambda e: friendly_edge_to_string(e, max_cnt_1_length, max_cnt_2_length), edges):
        print(edge_str)

In [None]:
# Sort edges by agreement
def has_significant_total(edge):
    return edge[2]['total'] > 75

sorted_edges = sorted(net.edges(data=True), key=lambda edge: edge[2]['agreement'])
sorted_edges =  list(__builtin__.filter(has_significant_total, sorted_edges))

In [None]:
# Show countries with the most agreement
most_agreement_countries = sorted_edges[-10:]
most_agreement_countries.reverse()
friendly_edge_print(most_agreement_countries)

In [None]:
# Show countries with the least agreement
least_agreement_countries = sorted_edges[:10]
friendly_edge_print(least_agreement_countries)

### In-group agreements

In [None]:
early_soviet_satellites = ["USSR", "POLAND", "CZECHOSLOVAKIA", "BELARUS", "UKRAINE"]
core_soviet_satellites = early_soviet_satellites + ["BULGARIA", "GERMAN DEMOCRATIC REPUBLIC", "HUNGARY"]

eu_1991 = ['BELGIUM', 'GERMANY', 'ITALY', 'LUXEMBOURG', 'DENMARK', 'GREECE', 'PORTUGAL', 'UNITED KINGDOM', 'SPAIN', 'NETHERLANDS', 'FRANCE', 'IRELAND']
eu_2002 = eu_1991 + ['SWEDEN', 'FINLAND', 'AUSTRIA']
eu_2024 = eu_2002 + ['ESTONIA', 'LATVIA', 'POLAND', 'SLOVAKIA', 'SLOVENIA', 'BULGARIA', 'ROMANIA', 'CROATIA', 'LITHUANIA', 'CZECHIA', 'CYPRUS', 'MALTA', 'HUNGARY']
eu_2024.remove('UNITED KINGDOM')

In [None]:
country_group = []

ingroup_agreements = {}
for i in range(0, len(country_group)):
    for j in range(i + 1, len(country_group)):
        country_1 = country_group[i]
        country_2 = country_group[j]
        agreement = net[country_1][country_2]['agreement']
        ingroup_agreements[(country_1, country_2)] = agreement
        
print('In-group country agreements:\n')
for k, v in sorted(ingroup_agreements.items(), key=lambda item: -item[1]):
    print(f'{k[0]} - {k[1]}: {round(v * 100, 1)}%')

## Distances

In [None]:
# Minmax normalization for the cut-off graph (normalizes to [0.5, 1])
def normalize_minmax(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    minw = min(weights)
    maxw = max(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_my'] =  ((weight - minw) / (2 * (maxw - minw))) + 0.5
        
# Our custom normalization for the standard graph - Direct implementation
def normalize_custom(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    maxw = max(weights)
    meanw = statistics.fmean(weights)
    weight_threshold = 2 * meanw - maxw
    
    minw = weight_threshold

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_my'] =  ((weight - minw) / (maxw - minw)) if weight > weight_threshold else 0
        
# calculate distances based on weight
def calculate_distances(net):
    for edge in net.edges(data=True):
        my_weight = edge[2]['weight_my']
        edge[2]['weight_distance'] = 1 / (my_weight if my_weight > 0 else 0.000001)
        
normalize_custom(net)
normalize_minmax(cnet)
calculate_distances(net)
calculate_distances(cnet)

In [None]:
def distance_analysis(_net, nodes_to_remove = []):
    net = _net.copy()
    net.remove_nodes_from(nodes_to_remove)
    
    all_valid_distances = filter(lambda x: x < 1000, (edge[2]['weight_distance'] for edge in net.edges(data=True)))
    print(f'Average direct distance: {statistics.mean(all_valid_distances)}')
    print(f'Average distance: {nx.average_shortest_path_length(net, weight="weight_distance")}')

    diameter = nx.diameter(net, weight='weight_distance')
    print(f'Weighted diameter: {diameter}')

    if diameter > 1:
        print('\nNodes with longest shortest paths:')
        periphery = nx.periphery(net, weight='weight_distance')
        for country in periphery:
            target = [k for k,v in nx.shortest_path_length(net, country, weight='weight_distance').items() if v == diameter]
            print(f'{country} - {target}')

In [None]:
print('Standard Graph:')
distance_analysis(net)

In [None]:
print('Cut-off Graph:')
distance_analysis(cnet, nodes_to_remove=[])

In [None]:
#nx.shortest_path(cnet, 'UNITED STATES', 'PALAU', weight = 'weight_distance')

## Centrality

In [None]:
def print_centrality(data, high_count=5, low_count=5):
    vals = sorted(data.items(), key=lambda x: x[1], reverse=True)
    vals = list(map(lambda val: (val[0], round(val[1], 1)), vals))
    
    if high_count > 0:
        print('Highest values:')
        highest_vals = vals[:high_count]
        for val in highest_vals:
            print(f'{val[0]}: {val[1]}');
    
    if low_count > 0:
        print('\nLowest values:')
        lowest_vals = vals[-low_count:]
        lowest_vals.reverse()
        for val in lowest_vals:
            print(f'{val[0]}: {val[1]}');

In [None]:
# Print weighted degree centrality
print_centrality(dict(net.degree(weight='weight')), 10, 10)

In [None]:
# Standard graph betweeness centrality
print_centrality(dict(nx.betweenness_centrality(net, weight='weight_distance', normalized=False)), 10, 0)

In [None]:
# Cut-off graph betweeness centrality
print_centrality(dict(nx.betweenness_centrality(cnet, weight='weight_distance', normalized=False)), 10, 0)