In [None]:
import os
import ast
import math
import statistics

import pandas as pd
import networkx as nx

In [None]:
# Load the records table with cleaned up country names as the primary data frame
records_path = "../data/records-renamed.csv"
df = pd.read_csv(records_path, index_col='ID', keep_default_na=False)

## Helpers

In [None]:
# Helper constants
GA = 'General Assembly'
SC = 'Security Council'
START = 1946
END = 2024

In [None]:
# Filter DFs with different parameters
def date_is_between(date, start_year, end_year):
    return date.str[0:4].astype(int).between(start_year, end_year)

def filter_df(df, body, start_year, end_year):
    return df[(df['Body'] == body) & (date_is_between(df['Date'], start_year, end_year))]

def has_subject(_subjects, subject_whitelist, subject_blacklist):
    subjects = set(ast.literal_eval(_subjects))
    passed = True
    
    if subject_whitelist:
        passed &= len(subjects.intersection(subject_whitelist)) > 0
    
    if subject_blacklist:
        passed &= len(subjects) > 0 and len(subjects.intersection(subject_blacklist)) == 0
    
    return passed

def filter_df_subjects(df, subject_whitelist, subject_blacklist):
    return df[df['Subjects'].map(lambda subjects: has_subject(subjects, subject_whitelist, subject_blacklist))]

## Connect Countries

In [None]:
vote_weights = {
    'Y': {
        'Y': 1.0,
        'N': 0.0,
        'A': 0.5,
        'X': 0.5
    },
    'N': {
        'Y': 0.0,
        'N': 1.0,
        'A': 0.5,
        'X': 0.5
    },
    'A': {
        'Y': 0.5,
        'N': 0.5,
        'A': 1.0,
        'X': 1.0
    },
    'X': {
        'Y': 0.5,
        'N': 0.5,
        'A': 1.0,
        'X': 1.0
    }
}

In [None]:
def connect_countries(net, country1, country2, vote1, vote2):
    vote_weight = vote_weights[vote1][vote2]
    if net.has_edge(country1, country2):
        total = net[country1][country2]['total']
        points = net[country1][country2]['points']
        
        net[country1][country2]['total'] = total + 1
        net[country1][country2]['points'] = points + vote_weight
    else:
        net.add_edge(country1, country2, points = vote_weight, total=1)
        
def connect_voting_points(net, voting_point_1, voting_point_2):
    vote_1 = voting_point_1[0]
    vote_2 = voting_point_2[0]

    country_1 = voting_point_1[2:]
    country_2 = voting_point_2[2:]

    connect_countries(net, country_1, country_2, vote_1, vote_2)

In [None]:
# Connect the countries and calculate their total agreement
def print_progress(finished, total, next_to_print):
    progress = math.floor(finished * 100 / total)
    if progress >= next_to_print:
        print('Progress: {}%'.format(progress))
        return next_to_print + 5
    else:
        return next_to_print
    
def connect_all_countries(net, body, start_year, end_year, subject_whitelist, subject_blacklist):
    df_wip = df[(df['Voting Data'] != 'Concensus') & (df['Voting Data'] != 'N/A')]
    df_wip = filter_df(df_wip, body, start_year, end_year)
    df_wip = filter_df_subjects(df_wip, subject_whitelist, subject_blacklist)

    finished = 0
    next_to_print = 0
    total = df_wip.shape[0]

    for index, row in df_wip.iterrows():
        voting_points = row['Voting Data'].split(';')
        for i in range(0, len(voting_points)):
            for j in range(i + 1, len(voting_points)):
                connect_voting_points(net, voting_points[i], voting_points[j])

        finished += 1
        next_to_print = print_progress(finished, total, next_to_print)
    
    for country in net:
        all_totals = (net[country][country_2]['total'] for country_2 in net[country])
        net.nodes[country]['total'] = max(all_totals)

## Add Properties

In [None]:
def remove_low_data_nodes(net):
    all_totals = (node[1] for node in net.nodes.data('total'))
    total_threshold = max(all_totals) / 4

    # find all edges below the threshold and grab id's
    nodes_to_remove = list(filter(lambda node: node[1] <= total_threshold, (node for node in net.nodes.data('total'))))
    nodes_to_remove_ids = list(node[0] for node in nodes_to_remove)

    # remove filtered edges from graph G
    print(f'Removing {len(nodes_to_remove_ids)} low total nodes')
    if len(nodes_to_remove_ids) > 0:
        print(nodes_to_remove_ids)
    net.remove_nodes_from(nodes_to_remove_ids)

In [None]:
def remove_low_data_edges(net):
    total_threshold = 10

    # find all nodes below the threshhold and grab id's
    edges_to_remove = list(filter(lambda e: e[2] <= total_threshold, (e for e in net.edges.data('total'))))
    edges_to_remove_ids = list(e[:2] for e in edges_to_remove)

    # remove filtered edges from graph G
    print(f'Removing {len(edges_to_remove_ids)} low total edges')
    if len(edges_to_remove_ids) > 0:
        print(edges_to_remove_ids)
    net.remove_edges_from(edges_to_remove)
    
    isolated_nodes = list(nx.isolates(net))
    print(f'Removing countries with no edges left: {isolated_nodes}')
    net.remove_nodes_from(isolated_nodes)

In [None]:
def calculate_agreement(net):
    for edge in net.edges(data=True):
        edge[2]['agreement'] = edge[2]['points'] / edge[2]['total']

def calculate_weight(net):
    for edge in net.edges(data=True):
        weight = edge[2]['agreement']
        edge[2]['weight'] = weight if weight > 0 else 0.000001

In [None]:
country_coord_path = "./country-coord.csv"
country_coords = pd.read_csv(country_coord_path, index_col='Country')[['Longitude (average)', 'Latitude (average)']]
country_coords.index= country_coords.index.str.upper()

In [None]:
country_size_path = "./country-size.csv"
country_sizes = pd.read_csv(country_size_path, index_col='Country')[['Area']]
country_sizes.index = country_sizes.index.str.upper()
country_sizes.index = country_sizes.index.str.strip()

In [None]:
def add_coordinates(net):
    for country in net:
        if country in country_coords.index:
            country_coord = country_coords.loc[country]
            lon = country_coord['Longitude (average)'] 
            lat = country_coord['Latitude (average)']
            net.nodes[country]['lon'] = lon
            net.nodes[country]['lat'] = lat
            net.nodes[country]['x'] = lon * 10
            net.nodes[country]['y'] = lat * 10
        else:
            print('No coords found for ' + country)

In [None]:
def add_sizes(net):
    for country in net:
        if country in country_sizes.index:
            size = country_sizes.loc[country]['Area'].item()
            net.nodes[country]['size'] = size
        else:
            print('No size found for ' + country)

## Graph Creation Methods

In [None]:
def save_graph(net, name):
    output_path = "../graphs/{}.gml".format(name)
    
    output_dir = os.path.dirname(output_path)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    nx.write_gml(net, output_path)

In [None]:
def create_graph(name, body, start_year, end_year, should_remove_low_data_nodes=False, subject_whitelist=None, subject_blacklist=None):
    net = nx.Graph()
    
    connect_all_countries(net, body, start_year, end_year, subject_whitelist, subject_blacklist)

    if should_remove_low_data_nodes:
        remove_low_data_nodes(net)
    
    if body == GA:
        remove_low_data_edges(net)
        
    calculate_agreement(net)
    calculate_weight(net)
    
    add_coordinates(net)
    add_sizes(net)
    
    save_graph(net, name)
    
    return net

In [None]:
def create_filled_graph(name, source_net):
    net = source_net.copy()
    
    fill_count = sum(1 for x in nx.non_edges(net))
    if fill_count == 0:
        print('No fill necessary')
        save_graph(net, name)
        return net
    
    print(f'Fill Graph - Filling {fill_count} countries')
    
    all_weights = map(lambda edge: edge[2]['weight'], net.edges(data=True))
    median_weight = statistics.median(all_weights)
    print(f'Fill weight: {median_weight}')
    
    for countries in nx.non_edges(net):
        print(f'Filling: {countries}')
        net.add_edge(countries[0], countries[1], points=0.0, total=0, agreement=0.0, weight=median_weight)
    
    save_graph(net, name)
    return net

In [None]:
def create_cutoff_graph(name, source_net):
    net = source_net.copy()
    
    # Choose the threshold
    agreements = [ edge[2]['agreement'] for edge in net.edges(data=True) ]
    agreement_threshold = statistics.median(agreements)

    # find all edges below the agreement threshold and grab id's
    edges_to_remove = list(filter(lambda e: e[2] <= agreement_threshold, (e for e in net.edges.data('agreement'))))
    edges_to_remove_ids = list(e[:2] for e in edges_to_remove)

    # remove filtered edges from graph G
    print(f'Cut-off Graph - Removing {len(edges_to_remove_ids)} edges')
    net.remove_edges_from(edges_to_remove)

    save_graph(net, name)
    
    return net

In [None]:
def create_graphs(name, body, start_year, end_year, should_remove_low_data_nodes=False, subject_whitelist=None, subject_blacklist=None):
    filled_name = f'{name}_filled'
    cutoff_name = f'{name}_cutoff'
    
    net = create_graph(name, body, start_year, end_year, should_remove_low_data_nodes, subject_whitelist, subject_blacklist)
    filled = create_filled_graph(filled_name, net)
    cutoff = create_cutoff_graph(cutoff_name, net)
    return net

In [None]:
def create_graphs_from_periods(periods, group_name, body, subject_whitelist=None, subject_blacklist=None):
    for period in periods:
        start = period[0]
        end = period[1]

        body_str = 'ga' if body == GA else 'sc'
        name = f'{body}/{group_name}/net_{body_str}_{start}_{end}'

        print(f'\nCreating {name}...')
        should_remove_low_data_nodes = (body == GA)
        create_graphs(name, body, start, end, should_remove_low_data_nodes, subject_whitelist, subject_blacklist)

In [None]:
def load_graph(name):
    graph_path = "../graphs/{}.gml".format(name)
    return nx.read_gml(graph_path)

## Create Graphs

In [None]:
global_time_periods = [
    (1946, 1955), # Post war, Korean War
    (1956, 1975), # Vietnam War
    (1976, 1991), # Late Cold War
    (1992, 2002), # Collapse of Communism
    (2003, 2024), # Contemporary
    (2022, 2024), # Current 
]

serbia_time_periods = [
    (1946, 1979), # Early Yugoslavia
    (1980, 1991), # Late Yugoslavia
    (2001, 2006), # Post-Sloba
    (2006, 2011), # Yellow Serbia
    (2012, 2024), # Punished Serbia
]

russia_time_periods = [
    (2000, 2008), # Early Putin (Pre Georgia)
    (2009, 2013), # Mid Putin (Post Georgia)
    (2014, 2021), # Putin Risen (Post Crimea)
    (2022, 2024), # Pariah (Post Ukraine)
]

france_time_periods = [
    (1946, 1980), # Colonial France
    (1981, 1994), # Miterand
    (1995, 2011), # Chirac & Sarkozy
    #(2012, 2016), # Hollande
    #(2017, 2024), # Macron
]

In [None]:
#net_ga_all = create_graphs('net_ga_all', GA, START, END)
#net_sc_all = create_graphs('net_sc_all', SC, START, END)

#create_graphs_from_periods(global_time_periods, "global", GA)
#create_graphs_from_periods(global_time_periods, "global", SC)
#create_graphs_from_periods(serbia_time_periods, "serbia", GA)
#create_graphs_from_periods(russia_time_periods, "russia", GA)
#create_graphs_from_periods(france_time_periods, "france", GA)

human_rights_subjects = ['HUMAN RIGHTS ADVANCEMENT', 'HUMAN RIGHTS--REPORTS', 'UN. HUMAN RIGHTS COUNCIL--REPORTS', 'GLOBALIZATION--HUMAN RIGHTS']
israel_and_palestine_subjects = ['TERRITORIES OCCUPIED BY ISRAEL--HUMAN RIGHTS--REPORTS', 'UNRWA--ACTIVITIES', 'PALESTINE QUESTION', 'MIDDLE EAST SITUATION', 'TERRITORIES OCCUPIED BY ISRAEL--NATURAL RESOURCES', 'TERRITORIES OCCUPIED BY ISRAEL--SETTLEMENT POLICY', 'TERRITORIES OCCUPIED BY ISRAEL--HUMAN RIGHTS']
decolonization_subjects = ['DECOLONIZATION', 'DECOLONIZATION--UN SYSTEM']
nuclear_subjects = ['NUCLEAR DISARMAMENT', 'NUCLEAR NON-PROLIFERATION', 'MIDDLE EAST--NUCLEAR PROLIFERATION', 'NUCLEAR WEAPON TESTS--TREATIES', 'NUCLEAR WEAPONS USE--UN. INTERNATIONAL COURT OF JUSTICE OPINION', 'NUCLEAR WEAPONS USE--TREATIES', 'NUCLEAR DISARMAMENT--TREATY COMPLIANCE', 'NUCLEAR WEAPONS--ELIMINATION', 'NUCLEAR DISARMAMENT--CONFERENCES', 'NUCLEAR DISARMAMENT NEGOTIATIONS', 'SOUTHERN HEMISPHERE--NUCLEAR-WEAPON-FREE ZONES', 'NUCLEAR NON-PROLIFERATION--INTERNATIONAL OBLIGATIONS', 'NUCLEAR-WEAPON-FREE ZONES', 'NUCLEAR WEAPONS--HUMANITARIAN CONSEQUENCES', 'MIDDLE EAST--NUCLEAR-WEAPON-FREE ZONES', 'NUCLEAR WEAPONS--TREATIES']

#create_graphs('Subjects/human_rights',             GA, 2003, END, True, subject_whitelist = human_rights_subjects)
#create_graphs('Subjects/israel_and_palestine',     GA, 2003, END, True, subject_whitelist = israel_and_palestine_subjects)
#create_graphs('Subjects/non_israel_and_palestine', GA, 2003, END, True, subject_blacklist = israel_and_palestine_subjects)
#create_graphs('Subjects/decolonization',           GA, 2003, END, True, subject_whitelist = decolonization_subjects)
#create_graphs('Subjects/nuclear',                  GA, 2003, END, True, subject_whitelist = nuclear_subjects)