In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm
import random
from concurrent.futures import ThreadPoolExecutor


In [None]:
country_net = nx.read_graphml("../networks/graphml/countries_network.graphml")
city_net = nx.read_graphml("../networks/graphml/cities_network.graphml")
combined_net = nx.read_graphml("../networks/graphml/combined_network.graphml")

## Node Level Analysis

#### Degree Centrality

In [4]:
country_deg_cen = pd.Series(nx.degree_centrality(country_net)).sort_values(ascending=False)
city_deg_cen = pd.Series(nx.degree_centrality(city_net)).sort_values(ascending=False)
combined_deg_cen = pd.Series(nx.degree_centrality(combined_net)).sort_values(ascending=False)

#### Eigen Centrality

In [5]:
country_eig_cen = pd.Series(nx.eigenvector_centrality(country_net)).sort_values(ascending=False)
city_eig_cen = pd.Series(nx.eigenvector_centrality(city_net)).sort_values(ascending=False)
combined_eig_cen = pd.Series(nx.eigenvector_centrality(combined_net)).sort_values(ascending=False)

#### PageRank

In [47]:
country_pg_rank = pd.Series(nx.pagerank(country_net), name="CountryPageRank").sort_values(ascending=False)
city_pg_rank = pd.Series(nx.pagerank(city_net), name="CityPageRank").sort_values(ascending=False)
combined_pg_rank = pd.Series(nx.pagerank(combined_net), name="CombinedPageRank").sort_values(ascending=False)

In [48]:
country_pg_rank.to_csv("pagerank/country_pg_rank.csv")
city_pg_rank.to_csv("pagerank/city_pg_rank.csv")
combined_pg_rank.to_csv("pagerank/combined_pg_rank.csv")

#### Closeness Centrality

In [7]:
country_closeness = pd.Series(nx.closeness_centrality(country_net)).sort_values(ascending=False)
city_closeness = pd.Series(nx.closeness_centrality(city_net)).sort_values(ascending=False)
combined_closeness = pd.Series(nx.closeness_centrality(combined_net)).sort_values(ascending=False)

In [8]:
country_closeness_rev = pd.Series(nx.closeness_centrality(country_net.reverse())).sort_values(ascending=False)
city_closeness_rev = pd.Series(nx.closeness_centrality(city_net.reverse())).sort_values(ascending=False)
combined_closeness_rev = pd.Series(nx.closeness_centrality(combined_net.reverse())).sort_values(ascending=False)

###

#### Trophic Levels

In [12]:
country_trophics = dict(sorted(nx.trophic_levels(country_net).items()))
city_trophics = dict(sorted(nx.trophic_levels(city_net).items()))
combined_trophics = dict(sorted(nx.trophic_levels(combined_net).items()))

In [14]:
country_trophics = pd.Series(country_trophics)
city_trophics = pd.Series(city_trophics)
combined_trophics = pd.Series(combined_trophics)

In [15]:
country_trophics.to_csv("trophiclvl/country_trophic.csv")
city_trophics.to_csv("trophiclvl/city_trophic.csv")
combined_trophics.to_csv("trophiclvl/combined_trophic.csv")

In [11]:
country_trophics.to_csv("trophiclvl/country_trophic.csv")

In [11]:
def plot_trophics(G, title, lvl, iters=100):
    trophics = dict(sorted(nx.trophic_levels(G).items()))
    pos = nx.spring_layout(G, k=1.5, iterations=iters)
    fig, ax = plt.subplots(figsize=(12, 12))
    nx.draw_networkx_nodes(G, pos, ax=ax, node_size=np.array(list(trophics.values()))*50)
    nx.draw_networkx_edges(G, pos, ax=ax, alpha=0.5, edge_color='gray')
    nx.draw_networkx_labels(G, labels = {node: node for node in G.nodes() if trophics[node] > lvl}, pos=pos, ax=ax)
    plt.title(f"{title} with Trophic levels >", lvl)
    plt.show(fig)

### HITS

In [50]:
country_hubs, country_auths = nx.hits(country_net)
city_hubs, city_auths = nx.hits(city_net)
combined_hubs, combined_auths = nx.hits(combined_net)

In [51]:
country_hubs, country_auths = pd.Series(country_hubs), pd.Series(country_auths)
city_hubs, city_auths = pd.Series(city_hubs), pd.Series(city_auths)
combined_hubs, combined_auths = pd.Series(combined_hubs), pd.Series(combined_auths)

In [52]:
country_hits = pd.DataFrame({"Auths": country_auths, "Hubs": country_hubs, "Adv": country_auths - country_hubs}).sort_values(by="Adv", ascending=False)
city_hits = pd.DataFrame({"Auths": city_auths, "Hubs": city_hubs, "Adv": city_auths - city_hubs}).sort_values(by="Adv", ascending=False)
combined_hits = pd.DataFrame({"Auths": combined_auths, "Hubs": combined_hubs, "Adv": combined_auths - combined_hubs}).sort_values(by="Adv", ascending=False)

In [53]:
country_hits.to_csv("hits/country_hits.csv", header=True)
city_hits.to_csv("hits/city_hits.csv", header=True)
combined_hits.to_csv("hits/combined_hits.csv", header=True)

In [36]:
country_hits['Adv'].get("Antigua and Barbuda", 0)

np.float64(0.07810166433549995)

### Effective Sizes

In [16]:
country_effective_size = pd.Series(nx.effective_size(country_net))
city_effective_size = pd.Series(nx.effective_size(city_net))
combined_effective_size = pd.Series(nx.effective_size(combined_net))

### Clustering

In [17]:
country_cluster = pd.Series(nx.clustering(country_net)).sort_values()
city_cluster = pd.Series(nx.clustering(city_net)).sort_values()
combined_cluster = pd.Series(nx.clustering(combined_net)).sort_values()

### Average Neighbour Degree

In [60]:
country_avg_neighbour = pd.Series(nx.average_neighbor_degree(country_net), name="CountryAvgNeighbour").sort_values()
city_avg_neighbour = pd.Series(nx.average_neighbor_degree(city_net), name="CityAvgNeighbour").sort_values()
combined_avg_neighbour = pd.Series(nx.average_neighbor_degree(combined_net), name="CombinedAvgNeighbour").sort_values()

In [None]:
country_avg_neighbour.to_csv("avg_neighbour/country_avg_neighbour.csv")
city_avg_neighbour.to_csv("avg_neighbour/city_avg_neighbour.csv")
combined_avg_neighbour.to_csv("avg_neighbour/combined_avg_neighbour.csv")


### Betweenness centrality

In [62]:
country_betweenness = pd.Series(nx.betweenness_centrality(country_net), name="CountryBetween").sort_values(ascending=False)
city_betweenness = pd.Series(nx.betweenness_centrality(city_net), name="CityBetween").sort_values(ascending=False)
combined_betweenness = pd.Series(nx.betweenness_centrality(combined_net), name="CombinedBetween").sort_values(ascending=False)

In [65]:
country_betweenness.to_csv("between/country_betweenness.csv")
city_betweenness.to_csv("between/city_betweenness.csv")
combined_betweenness.to_csv("between/combined_betweenness.csv")

### Cut sets

In [None]:
country_cutsets = list(nx.all_node_cuts(country_net.to_undirected()))
city_net_copy = city_net.copy()
city_net_copy.remove_nodes_from([n for n,d in city_net.degree() if d == 0])
city_cutsets = list(nx.all_node_cuts(city_net_copy.to_undirected()))
combined_net_copy = combined_net.copy()
combined_net_copy.remove_nodes_from([n for n,d in combined_net.degree() if d == 0])
combined_cutsets = list(nx.all_node_cuts(combined_net_copy.to_undirected()))

### All sinks in the network

In [39]:
country_sinks = [node for node, out_degree in country_net.out_degree() if out_degree == 0]
city_sinks = [node for node, out_degree in city_net.out_degree() if out_degree == 0]
combined_sinks = [node for node, out_degree in combined_net.out_degree() if out_degree == 0]

In [None]:
def get_winning_paths(net):
    net2 = net.copy()
    net2.remove_node("Oman")
    country_sinks2 = [node for node in country_net2.nodes() if country_net2.out_degree(node) == 0]
    country_sinks2

In [34]:
country_net2 = country_net.copy()
country_net2.remove_node("Oman")
country_sinks2 = [node for node in country_net2.nodes() if country_net2.out_degree(node) == 0]
country_sinks2

['Burkina Faso',
 'Democratic Republic of the Congo',
 'Lesotho',
 'Mexico',
 'Monaco',
 'Montenegro',
 'Morocco',
 'Republic of the Congo',
 'San Marino',
 'Togo',
 'Trinidad and Tobago']

In [40]:
useful_paths = []
for sucessor in country_net.successors("Oman"):
    for sink in country_sinks2:
        if path:=list(nx.all_simple_paths(country_net, source=sucessor, target=sink, cutoff=3)):
            useful_paths.extend(path)
useful_paths = sorted(useful_paths, key=lambda x: len(x))
winning_paths = ""
for path in useful_paths:
    winning_paths += "Oman -> "
    winning_paths += " -> ".join(path) + "\n"
with open("winning_paths.txt", "a") as f:
    f.write(winning_paths)

Anyone who says Norway after Yemen wins.

In [17]:
country_net3 = country_net.copy()
country_net3.remove_node("Yemen")
country_sinks3 = [node for node in country_net3.nodes() if country_net3.out_degree(node) == 0]
country_sinks3

['Germany',
 'Hungary',
 'Italy',
 'Norway',
 'Paraguay',
 'Turkey',
 'Uruguay',
 'Vatican City']

In [37]:
useful_paths = []
for sucessor in country_net.successors("Yemen"):
    for sink in country_sinks3:
        if path:=list(nx.all_simple_paths(country_net, source=sucessor, target=sink, cutoff=3)):
            useful_paths.extend(path)
useful_paths = sorted(useful_paths, key=lambda x: len(x))

winning_paths = ""
for path in useful_paths:
    winning_paths += "Yemen -> "
    winning_paths += " -> ".join(path) + "\n"
winning_paths

'Yemen -> Norway\nYemen -> Nauru -> Uruguay\nYemen -> Namibia -> Afghanistan -> Norway\nYemen -> Namibia -> Azerbaijan -> Norway\nYemen -> Nauru -> Uzbekistan -> Norway\nYemen -> Nepal -> Luxembourg -> Germany\nYemen -> Nepal -> Lebanon -> Norway\nYemen -> Nepal -> Liechtenstein -> Norway\nYemen -> Netherlands -> South Sudan -> Norway\nYemen -> Netherlands -> Spain -> Norway\nYemen -> Netherlands -> Sudan -> Norway\nYemen -> Netherlands -> Sweden -> Norway\nYemen -> New Zealand -> Djibouti -> Italy\nYemen -> Nicaragua -> Afghanistan -> Norway\nYemen -> Nicaragua -> Azerbaijan -> Norway\nYemen -> Nigeria -> Afghanistan -> Norway\nYemen -> Nigeria -> Azerbaijan -> Norway\nYemen -> North Korea -> Afghanistan -> Norway\nYemen -> North Korea -> Azerbaijan -> Norway\nYemen -> North Macedonia -> Afghanistan -> Norway\nYemen -> North Macedonia -> Azerbaijan -> Norway\nYemen -> Namibia -> Albania -> Afghanistan -> Norway\nYemen -> Namibia -> Albania -> Azerbaijan -> Norway\nYemen -> Namibia -> 

### Approximate Maximal Path Length Parity Analysis

In [54]:
def sample_maximal_paths(G, source, num_samples=1000):
    """
    Sample random maximal paths using random DFS.
    Much faster for large graphs.
    """
    sampled_paths = []
    
    for _ in range(num_samples):
        path = [source]
        visited = {source}
        current = source
        
        while True:
            neighbors = [n for n in G.successors(current) if n not in visited]
            if not neighbors:
                sampled_paths.append(path.copy())
                break
            
            # Randomly choose next node
            next_node = random.choice(neighbors)
            path.append(next_node)
            visited.add(next_node)
            current = next_node
    
    return sampled_paths

In [56]:
def source_parity_adv(G, source, **kwargs):
   sample_paths = sample_maximal_paths(G, source, **kwargs)
   sample_path_lengths = []
   with ThreadPoolExecutor() as executor:
      sample_path_lengths = list(executor.map(lambda path: 1 if len(path)%2 == 1 else -1, sample_paths))
   return sum(sample_path_lengths)
    

In [57]:
def parity_adv(G, **kwargs):
    parity_advantages = {node: source_parity_adv(G, node, **kwargs) for node in G.nodes()}
    return parity_advantages

In [58]:
country_parity_adv = pd.Series(parity_adv(country_net), name="CountryParity").sort_values(ascending=False)
city_parity_adv = pd.Series(parity_adv(city_net, num_samples = 500), name="CityParity").sort_values(ascending=False)
combined_parity_adv = pd.Series(parity_adv(combined_net, num_samples = 250), name="CombinedParity").sort_values(ascending=False)

In [59]:
country_parity_adv.to_csv("parity/country_parity_adv.csv")
city_parity_adv.to_csv("parity/city_parity_adv.csv")
combined_parity_adv.to_csv("parity/combined_parity_adv.csv")

In [8]:
country_parity_adv.to_csv("parity/country_parity_adv.csv")

In [22]:
pd.read_csv("parity/country_parity_adv.csv", index_col=0, header=0).loc["Antigua and Barbuda"].values[0]

np.int64(78)

## Self Loops

In [None]:
print("Number of Self Loops in Country Net:", nx.number_of_selfloops(country_net))