#### Imports
see time_series_inital_EDA.ipynb for preprocessing of "unhcr_refugees_08-16.csv"

In [1]:
import pandas as pd
from pandas import DataFrame
import numpy as np

from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
import networkx as nx
import powerlaw



pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("../data/unhcr_refugees_08-16.csv")
df.head()

Unnamed: 0,year,destination,origin,type,value,country_code_origin,name_en_origin,region_code_origin,region_code_en_origin,country_code_destination,name_en_destination,region_code_destination,region_code_en_destination
0,2008,Afghanistan,Iran (Islamic Republic of),Refugees (incl. refugee-like situations),30,IRN,Iran (Islamic Republic of),142.0,Asia,AFG,Afghanistan,142.0,Asia
1,2008,Afghanistan,Iraq,Refugees (incl. refugee-like situations),7,IRQ,Iraq,142.0,Asia,AFG,Afghanistan,142.0,Asia
2,2008,Albania,Algeria,Refugees (incl. refugee-like situations),1,ALG,Algeria,2.0,Africa,ALB,Albania,150.0,Europe
3,2008,Albania,China,Refugees (incl. refugee-like situations),4,CHI,China,142.0,Asia,ALB,Albania,150.0,Europe
4,2008,Albania,Czech Republic,Refugees (incl. refugee-like situations),2,CZE,Czech Republic,150.0,Europe,ALB,Albania,150.0,Europe


#### making a networkx graph

In [None]:
from_to_values = df.groupby(["origin",'destination'])['value'].sum().reset_index().sort_values("value", ascending=False)
#print(from_to_values)


edge_list=from_to_values[['origin', 'destination', 'value']]
edges = list(map(tuple, list(edge_list.values)))
DG = nx.DiGraph()
DG.add_weighted_edges_from(edges)

#print(edges)
nx.draw(DG,with_labels=True)

plt.show()

#### making powerlaw graph for all time refugees per origin country

In [None]:
origin = df.groupby(["origin"])["value"].agg(['sum'])
origin = origin.sort_values(['sum'], ascending=False)

print(origin)

degree_values =  origin['sum'].tolist()   #  sorted(set(degrees.values()))
print(degree_values)

In [None]:
plt.figure(figsize=(18,8));

#lables = [r'$x^{-( \alpha-1)}, \alpha = 1.11$', "Empirical data"]


fit = powerlaw.Fit(np.array(degree_values)+1,xmin=1,discrete=False)
fit.power_law.plot_ccdf( color= 'b',linestyle='--',label='fit ccdf')
fit.plot_ccdf( color= 'r')
plt.title('Origin of Refugees')
plt.xlabel('Out k = Number of refugees')
plt.ylabel('Complementary CDF '+ r'$P(X\geq k)$')
#plt.legend(lables, loc='best', fontsize = 'small')

plt.show()
print('alpha= ',fit.power_law.alpha,'  sigma= ',fit.power_law.sigma)

#### narrowing down to recent years for comparison on refugees per origin country

In [None]:
df_17 = df[df['year']>=2017]

origin = df_17.groupby(["origin"])["value"].agg(['sum'])
origin = origin.sort_values(['sum'], ascending=False)

print(origin)

degree_values =  origin['sum'].tolist()   #  sorted(set(degrees.values()))
print(degree_values)

In [None]:
plt.figure(figsize=(18,8));

#lables = [r'$x^{-( \alpha-1)}, \alpha = 1.11$', "Empirical data"]


fit = powerlaw.Fit(np.array(degree_values)+1,xmin=1,discrete=False)
fit.power_law.plot_ccdf( color= 'b',linestyle='--',label='fit ccdf')
fit.plot_ccdf( color= 'r')
plt.title('Origin of Refugees')
plt.xlabel('Out k = Number of refugees')
plt.ylabel('Complementary CDF '+ r'$P(X\geq k)$')
#plt.legend(lables, loc='best', fontsize = 'small')

plt.show()
print('alpha= ',fit.power_law.alpha,'  sigma= ',fit.power_law.sigma)

#### making powerlaw graphs for destination

In [None]:
destination = df.groupby(["destination"])["value"].agg(['sum'])
destination = destination.sort_values(['sum'], ascending=False)

print(destination)

degree_values =  destination['sum'].tolist()   #  sorted(set(degrees.values()))
print(degree_values)

In [None]:
plt.figure(figsize=(18,8));

#lables = [r'$x^{-( \alpha-1)}, \alpha = 1.11$', "Empirical data"]


fit = powerlaw.Fit(np.array(degree_values)+1,xmin=1,discrete=False)
fit.power_law.plot_ccdf( color= 'b',linestyle='--',label='fit ccdf')
fit.plot_ccdf( color= 'r')
plt.title('Destination of Refugees')
plt.xlabel('In k = Number of refugees')
plt.ylabel('Complementary CDF '+ r'$P(X\geq k)$')
#plt.legend(lables, loc='best', fontsize = 'small')

plt.show()
print('alpha= ',fit.power_law.alpha,'  sigma= ',fit.power_law.sigma)

#### narrowing down to see more recent years, refugees per destination country

In [None]:
destination = df_17.groupby(["destination"])["value"].agg(['sum'])
destination = destination.sort_values(['sum'], ascending=False)

print(destination)

degree_values =  destination['sum'].tolist()   #  sorted(set(degrees.values()))
print(degree_values)

In [None]:
plt.figure(figsize=(18,8));

#lables = [r'$x^{-( \alpha-1)}, \alpha = 1.11$', "Empirical data"]


fit = powerlaw.Fit(np.array(degree_values)+1,xmin=1,discrete=False)
fit.power_law.plot_ccdf( color= 'b',linestyle='--',label='fit ccdf')
fit.plot_ccdf( color= 'r')
plt.title('Destination of Refugees')
plt.xlabel('In k = Number of refugees')
plt.ylabel('Complementary CDF '+ r'$P(X\geq k)$')
#plt.legend(lables, loc='best', fontsize = 'small')

plt.show()
print('alpha= ',fit.power_law.alpha,'  sigma= ',fit.power_law.sigma)