In [None]:
import math
import powerlaw
import statistics

import networkx as nx
import matplotlib.pyplot as plt

from collections import Counter

In [None]:
def save_graph(net, name):
    output_path = "../graphs/{}.gml".format(name)
    nx.write_gml(net, output_path)

In [None]:
def load_graph(name):
    graph_path = "../graphs/{}.gml".format(name)
    return nx.read_gml(graph_path)

In [None]:
name = "General Assembly/global/net_ga_2003_2024"
net = load_graph(name)
cnet = load_graph(f'{name}_cutoff')

## Normalizing

In [None]:
# Minmax normalization for the cut-off graph (normalizes to [0.5, 1])
def normalize_minmax(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    minw = min(weights)
    maxw = max(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_minmax'] =  ((weight - minw) / (2 * (maxw - minw))) + 0.5

In [None]:
# ZScore normalization for the standard graph
def normalize_zscore(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    meanw = statistics.fmean(weights)
    stdw = statistics.stdev(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_zscore'] =  (weight - meanw) / stdw

In [None]:
# Our custom normalization for the standard graph
def normalize_custom(net):
    weights = [ edge[2]['weight_zscore'] for edge in net.edges(data=True) ]

    maxw = max(weights)

    for edge in net.edges(data=True):
        weight = edge[2]['weight_zscore']
        new_median = 0.5
        my_weight = (weight * (1 - new_median)) / maxw + new_median
        edge[2]['weight_my'] =  my_weight if my_weight > 0 else 0.0

In [None]:
# Our custom normalization for the standard graph - Direct implementation
def normalize_custom_direct(net):
    weights = [ edge[2]['weight'] for edge in net.edges(data=True) ]

    maxw = max(weights)
    meanw = statistics.fmean(weights)
    weight_threshold = 2 * meanw - maxw
    
    minw = weight_threshold

    for edge in net.edges(data=True):
        weight = edge[2]['weight']
        edge[2]['weight_my'] =  ((weight - minw) / (maxw - minw)) if weight > weight_threshold else 0

In [None]:
# calculate distances based on weight
def calculate_distances(net, src_weight):
    for edge in net.edges(data=True):
        my_weight = edge[2][src_weight]
        edge[2]['weight_distance'] = 1 / (my_weight if my_weight > 0 else 0.000001)

In [None]:
normalize_minmax(cnet)
calculate_distances(cnet, 'weight_minmax')

#normalize_zscore(net)
#normalize_custom(net)
normalize_custom_direct(net)
calculate_distances(net, 'weight_my')

## Plotting

In [None]:
def plot_weight(net, column_name, xscale = "log", yscale = "log", resolution=0.025, max_x = None):
    bracket_ratio = 1 / resolution
    
    weights = [ edge[2][column_name] for edge in net.edges(data=True) ]
    weights = list(map(lambda x: math.floor(x * bracket_ratio) / bracket_ratio, weights))
    
    print(f'Median weight: {statistics.median(weights)}')
    print(f'Average weight: {statistics.fmean(weights)}')
    
    weight_counts = Counter(weights)
    #print(weight_counts)
    x, y = zip(*weight_counts.items())

    plt.figure(1)
    
    plt.xlabel(column_name)
    plt.xscale(xscale)
    
    plot_min = min(0, min(x))
    plot_max = max_x if max_x != None else max(x)
    plt.xlim(plot_min, plot_max)

    plt.ylabel('frequency')
    plt.yscale(yscale)
    plt.ylim(0, max(y) * 1.1)

    plt.scatter(x, y, marker='.')
    plt.show()

In [None]:
plot_weight(net, 'weight', xscale='linear', yscale='linear', resolution = 0.025)

In [None]:
plot_weight(cnet, 'weight', xscale='linear', yscale='linear', resolution = 0.025)

In [None]:
#plot_weight(net, 'weight_zscore', xscale='linear', yscale='linear', resolution = 0.2)

In [None]:
plot_weight(net, 'weight_my', xscale='linear', yscale='linear', resolution = 0.025)

In [None]:
plot_weight(cnet, 'weight_minmax', xscale='linear', yscale='linear', resolution = 0.025)

In [None]:
# Show the distribution of 'weight' values
plot_weight(net, 'weight_distance', xscale='linear', yscale='linear', resolution = 0.1, max_x = 5)

In [None]:
# Show the distribution of 'weight' values
plot_weight(cnet, 'weight_distance', xscale='linear', yscale='linear', resolution = 0.1, max_x = 5)