In [None]:
import math
import powerlaw
import statistics

import networkx as nx
import matplotlib.pyplot as plt

from collections import Counter

In [None]:
def load_graph(name):
    graph_path = "../graphs/{}.gml".format(name)
    return nx.read_gml(graph_path)

In [None]:
# Load the chosen graph

name = "General Assembly/russia/net_ga_2014_2021"
target_country = "RUSSIAN FEDERATION"

fname = f'{name}_filled'
cname = f'{name}_cutoff'

net = load_graph(name)
fnet = load_graph(fname)
cnet = load_graph(cname)

## Basic Characteristics

In [None]:
def weighted_density(net):
    if net.number_of_nodes() < 2:
        return 1

    weights = (edge[2]['weight'] for edge in net.edges(data=True))
    possible_edges = (net.number_of_nodes() * (net.number_of_nodes() - 1)) / 2
    return sum(weights) / possible_edges

In [None]:
# Rich club check
def calculate_target_rich_club(net):
    net_degrees = nx.degree(net, weight='weight')
    target_degree = net_degrees[target_country]

    rich_club_net = nx.subgraph(net, [x for x in net.nodes() if net_degrees[x] >= target_degree])

    total_size = net.number_of_nodes()
    rich_club_size = rich_club_net.number_of_nodes()
    percentage = round((rich_club_size * 100) / total_size)

    print(f'Target degree: {target_degree}')
    print(f'Weighted density: {weighted_density(net)}')
    print(f'Rich Club of {rich_club_size} ({percentage}%) nodes - Weighted density: {weighted_density(rich_club_net)}')

In [None]:
calculate_target_rich_club(fnet)

In [None]:
calculate_target_rich_club(cnet)

In [None]:
cutoff_is_connected = nx.is_connected(cnet)
print(f'Cutoff is connected: {cutoff_is_connected}')
if not cutoff_is_connected:
    component = nx.node_connected_component(cnet, target_country)
    print(f'Target component node count: {len(component)}')

In [None]:
print(f'Average clustering: {nx.average_clustering(fnet, weight="weight")}')
print(f'Target clustering: {nx.clustering(fnet, nodes=target_country, weight="weight")}')

## Least and most friendly countries

In [None]:
# Average agreement for every country
avg_agreements = {}
for country_1 in net:
    agreements = (net[country_1][country_2]['agreement'] for country_2 in net[country_1])
    avg_agreements[country_1] = statistics.mean(agreements)

In [None]:
def find_placement():
    print(f'Target average agreement:')
    placement = 1
    for k, v in sorted(avg_agreements.items(), key=lambda item: -item[1]):
        if k == target_country:
            print(f'{placement}. {k} - {round(v * 100, 1)}%')
            return placement

        placement += 1
    return -1

placement = find_placement()
top_perc = round((placement * 100) / net.number_of_nodes())
print(f'Target is in top {top_perc}% countries')

In [None]:
agreements = [ edge[2]['agreement'] for edge in net.edges(data=True) ]

print(f'Median agreement: {statistics.median(agreements)}')
print(f'Average agreement: {statistics.fmean(agreements)}')

In [None]:
def count_relationship_levels(net):
    bad_threshold = 0.5
    good_threshold = 0.787
    great_threshhold = 0.95

    agreements = [ net[target_country][edge]['agreement'] for edge in net[target_country] ]
    
    total = len(agreements)
    def to_percentage(val):
        perc = val * 100 / total
        return f'{round(perc)}%'
    
    abysmal = len(list(filter(lambda x: x < bad_threshold, agreements)))
    bad = len(list(filter(lambda x: x >= bad_threshold and x < good_threshold, agreements)))
    good = len(list(filter(lambda x: x >= good_threshold and x < great_threshhold, agreements)))
    great = len(list(filter(lambda x: x >= great_threshhold, agreements)))
    
    print(f'Abysmal: {abysmal} ({to_percentage(abysmal)})')
    print(f'Bad: {bad} ({to_percentage(bad)})')
    print(f'Good: {good} ({to_percentage(good)})')
    print(f'Great: {great} ({to_percentage(great)})')
    
count_relationship_levels(net)

In [None]:
def friendly_edge_to_string(edge, country_buffer_1, country_buffer_2):
    country1 = edge[0].ljust(country_buffer_1)
    country2 = edge[1].ljust(country_buffer_2)
    
    total = str(edge[2]['total']).ljust(5)
    points = str(edge[2]['points']).rjust(6)
    agreement = round(edge[2]['agreement'] * 100, 1)
    
    return '{}, {} - {}/{} ({}%)'.format(country1, country2, points, total, agreement)

def edge_country_1_length(edge):
    return len(edge[0])

def edge_country_2_length(edge):
    return len(edge[1])

def edge_country_2_length(edge):
    return len(edge[1])

def friendly_edge_print(edges):
    max_cnt_1_length = max(map(edge_country_1_length, edges))
    max_cnt_2_length = max(map(edge_country_2_length, edges))
    for edge_str in map(lambda e: friendly_edge_to_string(e, max_cnt_1_length, max_cnt_2_length), edges):
        print(edge_str)

In [None]:
# Sort edges by agreement
def has_significant_total(edge):
    return edge[2]['total'] > 75

edges = [ (target_country, edge, net[target_country][edge]) for edge in net[target_country] ]
sorted_edges = sorted(edges, key=lambda edge: edge[2]['agreement'])
sorted_edges =  list(__builtin__.filter(has_significant_total, sorted_edges))

In [None]:
# Show countries with the most agreement
most_agreement_countries = sorted_edges[-10:]
most_agreement_countries.reverse()
friendly_edge_print(most_agreement_countries)

In [None]:
# Show countries with the least agreement
least_agreement_countries = sorted_edges[:10]
friendly_edge_print(least_agreement_countries)

## In-group agreements

In [None]:
global_actors = ['USSR', 'RUSSIAN FEDERATION', 'UNITED STATES', 'CHINA', 'GERMANY', 'GERMANY, FEDERAL REPUBLIC OF','FRANCE', 'UNITED KINGDOM', 'INDIA']
if target_country in global_actors:
    global_actors.remove(target_country)

russia_neighbours = ['FINLAND', 'ESTONIA', 'LATVIA', 'BELARUS', 'UKRAINE', 'GEORGIA', 'AZERBAIJAN', 'KAZAKHSTAN', 'MONGOLIA', 'CHINA', "DEMOCRATIC PEOPLE'S REPUBLIC OF KOREA"]
yugoslavia_neighbours = ['HUNGARY', 'ROMANIA', 'BULGARIA', 'ALBANIA', 'GREECE', 'ITALY', 'AUSTRIA']
serbia_neighbours = ['HUNGARY', 'ROMANIA', 'BULGARIA', 'ALBANIA', 'NORTH MACEDONIA', 'MONTENEGRO', 'BOSNIA AND HERZEGOVINA', 'CROATIA']

neighbours = russia_neighbours

In [None]:
def print_ingroup_agreements(country_group):
    ingroup_agreements = {}
    for country in country_group:
        if country in net:
            agreement = net[target_country][country]['agreement']
            ingroup_agreements[country] = agreement

    for k, v in sorted(ingroup_agreements.items(), key=lambda item: -item[1]):
        print(f'{k}: {round(v * 100, 1)}%')

In [None]:
print('Global actor relationships:')
print_ingroup_agreements(global_actors)

In [None]:
print('Neighbour relationships:')
print_ingroup_agreements(neighbours)

## Distances

In [None]:
# Minmax normalization for the cut-off graph (normalizes to [0.5, 1])
def normalize_minmax(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    minw = min(weights)
    maxw = max(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_my'] =  ((weight - minw) / (2 * (maxw - minw))) + 0.5
        
# Our custom normalization for the standard graph - Direct implementation
def normalize_custom(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    maxw = max(weights)
    meanw = statistics.fmean(weights)
    weight_threshold = 2 * meanw - maxw
    
    minw = weight_threshold

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_my'] =  ((weight - minw) / (maxw - minw)) if weight > weight_threshold else 0
        
# calculate distances based on weight
def calculate_distances(net):
    for edge in net.edges(data=True):
        my_weight = edge[2]['weight_my']
        edge[2]['weight_distance'] = 1 / (my_weight if my_weight > 0 else 0.000001)
        
normalize_custom(net)
normalize_minmax(cnet)
calculate_distances(net)
calculate_distances(cnet)

In [None]:
def distance_analysis(_net, country_group):
    net = _net.copy()
    for country in country_group:
        if country in net:
            try:
                shortest_path = nx.shortest_path(net, target_country, country, weight='weight_distance')
                if len(shortest_path) > 2:
                    print(shortest_path[1:])
            except:
                print(f'No path to {country}')

In [None]:
print('Standard Graph')

print('\nPaths to global actors')
distance_analysis(net, global_actors)

print('\nPaths to neighbours')
distance_analysis(net, neighbours)

In [None]:
print('Cut-off Graph')

print('\nPaths to global actors')
distance_analysis(cnet, global_actors)

print('\nPaths to neighbours')
distance_analysis(cnet, neighbours)

## Centrality

In [None]:
def print_centrality(data, high_count=5, low_count=5):
    vals = sorted(data.items(), key=lambda x: x[1], reverse=True)
    vals = list(map(lambda val: (val[0], round(val[1], 1)), vals))
    
    if high_count > 0:
        print('Highest values:')
        highest_vals = vals[:high_count]
        for val in highest_vals:
            print(f'{val[0]}: {val[1]}');
    
    if low_count > 0:
        print('\nLowest values:')
        lowest_vals = vals[-low_count:]
        lowest_vals.reverse()
        for val in lowest_vals:
            print(f'{val[0]}: {val[1]}');

In [None]:
def calculate_betweenness(_net):
    net = _net.copy()
    for node in remove_nodes:
        if node in net:
            net.remove_node(node)

    betweenness_centrality = nx.betweenness_centrality(net, weight='weight_distance', normalized=False)
    print(f'Target betweenness: {betweenness_centrality[target_country]}\n')
    print_centrality(dict(betweenness_centrality), 10, 0)

In [None]:
calculate_betweenness(net)

In [None]:
calculate_betweenness(cnet)