In [None]:
import math
import powerlaw
import statistics

import networkx as nx
import matplotlib.pyplot as plt

from collections import Counter

In [None]:
def save_graph(net, name):
    output_path = "../graphs/{}.gml".format(name)
    nx.write_gml(net, output_path)

In [None]:
def load_graph(name):
    graph_path = "../graphs/{}.gml".format(name)
    return nx.read_gml(graph_path)

In [None]:
name = "General Assembly/russia/net_ga_russia_2022_2024"
net = load_graph(name)

## Normalizing

In [None]:
# Minmax
weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

minw = min(weights)
maxw = max(weights)

for edge in net.edges(data=True):
    weight = edge[2]['weight']
    edge[2]['weight_minmax'] =  (weight - minw) / (maxw - minw)

In [None]:
# ZScore
weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

meanw = statistics.fmean(weights)
stdw = statistics.stdev(weights)

for edge in net.edges(data=True):
    weight = edge[2]['weight']
    edge[2]['weight_zscore'] =  (weight - meanw) / stdw

In [None]:
# Custom
weights = [ edge[2]['weight_zscore'] for edge in net.edges(data=True) ]

maxw = max(weights)

for edge in net.edges(data=True):
    weight = edge[2]['weight_zscore']
    my_weight = weight / (maxw * 2) + 0.5
    edge[2]['weight_my'] =  my_weight if my_weight > 0 else 0.0

## Plotting

In [None]:
def plot_weight(net, column_name, xscale = "log", yscale = "log"):
    weights = [ edge[2][column_name] for edge in net.edges(data=True) ]
    weights = list(map(lambda x: math.floor(x * 40) / 40, weights))
    
    print(f'Median weight: {statistics.median(weights)}')
    print(f'Average weight: {statistics.fmean(weights)}')
    
    weight_counts = Counter(weights)
    #print(weight_counts)
    x, y = zip(*weight_counts.items())

    plt.figure(1)
    
    plt.xlabel(column_name)
    plt.xscale(xscale)
    plt.xlim(min(0, min(x)), max(x))

    plt.ylabel('frequency')
    plt.yscale(yscale)
    plt.ylim(0, max(y) * 1.1)

    plt.scatter(x, y, marker='.')
    plt.show()

In [None]:
# Show the distribution of 'weight' values
plot_weight(net, 'weight', xscale='linear', yscale='linear')

In [None]:
# Show the distribution of 'weight' values
plot_weight(net, 'weight_minmax', xscale='linear', yscale='linear')

In [None]:
# Show the distribution of 'weight' values
plot_weight(net, 'weight_zscore', xscale='linear', yscale='linear')

In [None]:
# Show the distribution of 'weight' values
plot_weight(net, 'weight_my', xscale='linear', yscale='linear')

## Saving

In [None]:
def clean_weights(chosen_weight='weight'):
    for edge in net.edges(data=True):
        edge[2]['weight'] = edge[2][chosen_weight]
        del edge[2]['weight_minmax']
        del edge[2]['weight_zscore']
        del edge[2]['weight_my']
        
    # filter out all edges above threshold and grab id's
    zero_edges = list(filter(lambda e: e[2] == 0, (e for e in net.edges.data('weight'))))
    zero_edge_ids = list(e[:2] for e in zero_edges)

    # remove filtered edges from graph G
    net.remove_edges_from(zero_edge_ids)

In [None]:
clean_weights(chosen_weight='weight_my')
save_graph(net, f'{name}_normalized')

In [None]:
net['RUSSIAN FEDERATION']['UNITED STATES']