In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
import scipy.linalg as la
from scipy.stats import linregress

import matplotlib
matplotlib.rc('xtick',labelsize=16)
matplotlib.rc('ytick',labelsize=16)
matplotlib.rc('font',size = 24)
matplotlib.rc('legend',fontsize = 17)
matplotlib.rc('figure',titlesize = 25)

In [2]:
# import and clean the data

df = pd.read_csv('allies_and_enemies_1816_2014_iso.csv')


df = df[(df['alliance'] != 0) | (df['conflict']!=0)]   #filter entries with no link

df['weight'] = df['alliance']+df['conflict']+df['alliance']*df['conflict']

df = df.drop(columns = ['alliance','conflict'])

In [3]:
# Descriptive statistics of the whole dataset:

print('Alliances: ', len(df[df['weight'] == 1]))
print('Conflicts:',len(df[df['weight'] == -1]))
print('Total:',len(df))


countries = np.unique(np.concatenate((np.array(df['statea']),np.array(df['stateb']))))
years = np.unique(df['year'])

print('Time span:',len(years))
print('No. countries:', len(countries))

#df.groupby()

Alliances:  221364
Conflicts: 19930
Total: 241294
Time span: 199
No. countries: 210


In [4]:
# Stats per year

years_v = np.arange(1816,2015)
nodes_year = np.zeros(len(years_v))
links_year = np.zeros(len(years_v))
pos_links_year = np.zeros(len(years_v))
neg_links_year = np.zeros(len(years_v))
no_components = np.zeros(len(years_v))
giant_component = np.zeros(len(years_v))

for idx, year in enumerate(years_v):
    
    df_year = df[df['year'] == year]
    countries = np.unique(np.concatenate((np.array(df_year['statea']),np.array(df_year['stateb']))))
    
    nodes_year[idx] = len(countries)
    links_year[idx] = len(df_year)
    pos_links_year[idx] = len(df_year[df_year['weight'] == 1])
    neg_links_year[idx] = len(df_year[df_year['weight'] == -1])
    
    G = nx.from_pandas_edgelist(df_year, source = 'statea', target = 'stateb')
    no_components[idx] = nx.number_connected_components(G)
    giant_component[idx] = len(max(nx.connected_components(G), key = len))
    
    
print('Avg. no. of nodes:',nodes_year.mean())
print('Avg. no. of links:',links_year.mean())

Avg. no. of nodes: 65.75879396984925
Avg. no. of links: 1212.532663316583


In [16]:
# plots


fig, ax = plt.subplots(figsize = (5,5))
ax.plot(years_v, nodes_year, color = 'k')
ax.set_title('Number of nodes', fontsize = 25)
ax.set_xlabel('Year')
ax.set_ylabel('Number of nodes')
fig.tight_layout()
plt.savefig('Network_statistics_nodes.eps')
plt.close()

fig, ax = plt.subplots(figsize = (5,5))
ax.plot(years_v, no_components, color = 'k')
ax.set_xlabel('Year')
ax.set_ylabel('# components')
ax.set_title('Connected components', fontsize = 25)
ax.set_yticks(list(np.arange(0,10)))
ax.set_ylim([0,10])
ax2 = ax.twinx()
ax2.plot(years_v, giant_component/nodes_year, color = 'k', ls = '--' )
ax2.set_ylabel('GC size/network size', fontsize = 20)
ax2.set_ylim([0,1.1])
fig.tight_layout()
plt.savefig('Network_statistics_components.eps')
plt.close()

fig, ax = plt.subplots(figsize = (5,5))
ax.plot(years_v, links_year, label = 'Total', color = 'k')
ax.plot(years_v, pos_links_year, label = 'Positive', color = 'green')
ax.plot(years_v, neg_links_year, label = 'Negative', color = 'red')
ax.set_title('Number of edges', fontsize = 25)
ax.set_yscale('log')
ax.legend()
ax.set_xlabel('Year')
ax.set_ylabel('Number of edges')
fig.tight_layout()
plt.savefig('Network_statistics_edges.eps')
plt.close()

fig, ax = plt.subplots(figsize = (5,5))
ax.plot(years_v, pos_links_year/links_year, label = '+ links', color = 'green')
ax.plot(years_v, neg_links_year/links_year, label = '- links', color = 'red')
ax.set_title('Fraction of +/- edges', fontsize = 25)
ax.legend(loc = 'right')
ax.set_xlabel('Year')
fig.tight_layout()
plt.savefig('Network_statistics_edge_fraction.eps')
plt.close()




The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
