In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as geopandas
import matplotlib as mpl
import sys, os

# raw conflict data table
path = '/Users/BAMAC/GitStuff/VOL/ref_con/'
data = pd.read_csv(path+'main_conflict_table.csv')

# geopandas data
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world = world[world.name!='Antarctica']
country_list = list(world.name.values)

# refugee data
# resettlement data
asylum = pd.read_csv(path+'refugee-data/time_series.csv',
                     names=['year','dest','origin','pop','value'],skiprows=1)


In [4]:
not_found = []
# matches the name of the country from the geomap to the dataset for plotting purposes
def name_matcher(x_list,country_list):
    
    corr_list = {'Hyderabad':'India','Dominican Republic':'Dominican Rep.','Surinam':'Suriname',                 
             'Rumania':'Romania','Bosnia and Herzegovina':'Bosnia and Herz.',
             'Central African Republic':'Central African Rep.','Cote D’Ivoire':'Côte d\'Ivoire',
                'Tibetian':'Tibet','Viet Nam':'Vietnam','Equatorial Guinea':'Eq. Guinea',
                'Lao People\'s Dem. Rep.':'Laos','The former Yugoslav Republic of Macedonia':'Macedonia',
                'Czech Rep.':'Czechia','Iran (Islamic Rep. of)':'Iran',
                'Brunei Darussalam':'Brunei','Dem. Rep. of the Congo':'Congo','China, Macao SAR':'China',
                'Syrian Arab Rep.':'Syria','China, Hong Kong SAR':'China',"Dem. People's Rep. of Korea":'North Korea',
                'Bolivia (Plurinational State of)':'Bolivia','Venezuela (Bolivarian Republic of)':'Venezuela',
                'Solomon Islands':'Solomon Is.','Rep. of Korea':'North Korea','South Sudan':'S. Sudan',
                'Rep. of Moldova':'Moldova'}
    
    corr_keys=list(corr_list.keys())
    
    new_name = []
    for x in x_list:
        map_names = country_list
    
        if x!= '':
            #if x not in '\t'.join(map_names):
            if x not in map_names:
                for i in map_names: 
                    if i in x:
                        new_name.extend([i])
            else:
                new_name.extend([x])
            
        if x == 'Various/Unknown':
            new_name.extend(['V/U'])
            
        if new_name == []:
            for x in x_list:
                if x in corr_keys:
                    new_name.extend([corr_list[x]])
            
    return new_name


# simple splitter for country entries with a ',' in them
def name_splitter(x):

    return x.split(', ')


# add x,y coordinates of the contry (centroid)
def add_country_cent(x_list,world_data):
    coords_list = []
        
    for x in x_list:
        if x!= 'V/U':
            if x in world_data.name.values:
                
                if x != 'France':                
                    buff_coords = world_data[world_data.name == x].geometry.centroid[0]
                    coords_list.append([buff_coords.x,buff_coords.y])
                else:
                    coords_list.append([46.5,1.6])                    
            else:
                coords_list.append([])
                
    return coords_list


def fix_date (x):
    if pd.isnull(x):
        new_x= -99
    else:
        date_to_fix = x 
        if 30<=int(date_to_fix[-2:])<=99:
            new_x = date_to_fix[:-2]+'19'+date_to_fix[-2:]
        else:
            new_x = date_to_fix[:-2]+'20'+date_to_fix[-2:]
    return new_x


def get_year (x):
    if pd.isnull(x):
        new_x= -99
    else:
        date_to_fix = x 
        if 30<=int(date_to_fix[-2:])<=99:
            new_x = '19'+date_to_fix[-2:]
        else:
            new_x = '20'+date_to_fix[-2:]
    return new_x


def conflict_duration(x):
    
    if x.EpEnd == 1:
        return int(x.EY) - int(x.SY)
    else:
        return 0


def hanging_line(point1, point2):
    import numpy as np

    a = (point2[1] - point1[1])/(np.cosh(point2[0]) - np.cosh(point1[0]))
    b = point1[1] - a*np.cosh(point1[0])
    x = np.linspace(point1[0], point2[0], 200)
    y = a*np.cosh(x) + b
    
    
    return (x,y)



In [5]:
new_data = data[['Location', 'SideA', 'SideA2nd', 'SideB', 'SideB2nd',
                 'StartDate', 'StartPrec', 'EpEnd', 'EpEndDate', 'EpEndPrec']].copy()

# fix the country names
new_data['loc_split'] = new_data['Location'].map(name_splitter)
new_data['loc_split_map'] = new_data['loc_split'].apply(name_matcher,args=(country_list,))
# new_data['coords'] = new_data['loc_split_map'].apply(add_country_cent,args=(world,))

# get the start year and the end year
new_data['SY'] = new_data['StartDate'].map(get_year)
new_data['EY'] = new_data['EpEndDate'].map(get_year)
# get the conflict duration in years
new_data['CD'] = new_data.apply(conflict_duration,axis=1)

In [6]:
# matched = np.unique(np.concatenate(new_asylum['origin_split_map'].values))
# orig = np.unique((new_asylum['origin'].values))
# 
# not_found = set(orig) -set(matched)
# print(not_found)
# print('-------')


In [8]:
new_asylum = asylum.copy()

# match the destination countries with the map codes
new_asylum['dest_map'] = new_asylum['dest'].apply(name_splitter).apply(name_matcher,args=(country_list,)) 
new_asylum['dest_coords'] = new_asylum['dest_map'].apply(name_matcher,args = (country_list,))

# match the origin countries with the map codes
new_asylum['origin_split'] = new_asylum['origin'].apply(name_splitter)
new_asylum['origin_split_map'] = new_asylum['origin_split'].apply(name_matcher,args = (country_list,))
new_asylum['origin_coords'] = new_asylum['origin_split_map'].apply(name_matcher,args = (country_list,))

In [9]:
match_dict_2 = {'Tibetian':'Tibet','Viet Nam':'Vietnam','Equatorial Guinea':'Eq. Guinea',
                'Lao People\'s Dem. Rep.':'Laos','The former Yugoslav Republic of Macedonia':'Macedonia',
                'Bosnia and Herzegovina':'Bosnia and Herz.','Czech Rep.':'Czechia','Iran (Islamic Rep. of)':'Iran',
                'Brunei Darussalam':'Brunei','Dem. Rep. of the Congo':'Congo','China, Macao SAR':'China',
                'Syrian Arab Rep.':'Syria','China, Hong Kong SAR':'China',"Dem. People's Rep. of Korea":'North Korea',
                'Bolivia (Plurinational State of)':'Bolivia','Venezuela (Bolivarian Republic of)':'Venezuela',
                'Solomon Islands':'Solomon Is.','Rep. of Korea':'North Korea','South Sudan':'S. Sudan',
                'Rep. of Moldova':'Moldova','':''}

In [10]:
sel_year = 1990
thresh_value = 10 # at least these many people should have been displaced

subset_conflicts = new_data[new_data.SY == str(sel_year)]
subset_asylum = new_asylum[new_asylum.year == sel_year]

conflict_regs = np.unique(np.concatenate(subset_conflicts.loc_split_map.values))
asylum_origin = np.unique(np.concatenate(subset_asylum.origin_split_map.values))
asylum_dest = np.unique(np.concatenate(subset_asylum.dest_map.values))

mappings = subset_asylum[['dest_map','origin_split_map','origin','value']].values

# print('In ', str(sel_year))
# print('Conflicts were observed in:', np.sort(conflict_regs))
# print('Movement of displaces ind. was seen from:',[i for i in np.sort(asylum_origin) if i in conflict_regs])
# print('and from possibly other states to:', np.sort(asylum_dest))

In [11]:
cmap = plt.get_cmap('Spectral')
all_colors = cmap(np.linspace(0, 1, len(asylum_dest)))
dest_clr = {asylum_dest[i]: mpl.colors.to_hex(all_colors[i][:3]) for i in range(len(asylum_dest))}


In [12]:
# test_as = subset_asylum[0:10]
# print(test_as.groupby('dest')['value'].sum())
# print(test_as.groupby('dest')['origin'].apply(list))

In [13]:
bins = [1e8,1e7,5e6,1e6,5e5,1e5,5e4,1e4,1][::-1]
bin_text = ['100m','10m','5m','1m','500k','100k','50k','10k','> 1'][::-1]
labels = range(len(bins)-1)

# Colors option 1
cmap_bins = plt.get_cmap('terrain')
# # cmap_bins = plt.get_cmap('gist_earth')
# bin_col_list = cmap_bins(np.linspace(0,0.8,len(bins)))
# bin_cols = {i: mpl.colors.to_hex(bin_col_list[i]) for i in range(len(bins))}

# Colors option 2
my_cmap = ["#ff474c", "#fac205","#448ee4","#04d9ff","#40a368","#b0dd16","#cb7723","#86775f","#ca7b80"][::-1]
bin_cols = {i: my_cmap[i] for i in range(len(bins))}

final_test = subset_asylum.groupby('dest')\
    .agg({ 'value':lambda x : list(x),
           'origin_split_map':'sum','dest_map':'sum',
           'origin':lambda x : list(x)}).reset_index()
final_test['value_sum'] = final_test['value'].apply(np.sum)

final_test_sorted = final_test.sort_values(by='value_sum',ascending=False)
final_test_sorted['binned'] = pd.cut(final_test_sorted.value_sum,bins=bins,labels=labels)

In [14]:
max_links = 3 #(how many countries to show for linkages)
cut_off = 1e5 # max can be 5e6
cut_off_bin = np.where(np.array(bins)== cut_off)[0][0]

final_test_sorted_to_plot = final_test_sorted[final_test_sorted.binned >=cut_off_bin]

In [16]:
plot_counter = 0

start_leg_x = -180
end_leg_x = -80
inc_x = (end_leg_x-start_leg_x)*1./len(bin_cols)
start_leg_y = -67
end_leg_y = -60

start_met_x = start_leg_x
start_met_y = -30
end_met_y = -10
total_met_y = -40
dest_met_y = -50
inc_met = (end_met_y-start_met_y)/max_links

for i,row_i in final_test_sorted_to_plot.iterrows():

    fig_main,ax = plt.subplots(1,1)
    plt.style.use('default')
    sns.set(style='white')
    
    map_lw = 0.3
        
    world.plot(linewidth=map_lw,facecolor='gray',alpha=0.13,edgecolor='k',ax=ax)
    conf_world = world[world.name.isin(conflict_regs)]
    conf_world.plot(linewidth=map_lw,edgecolor='red',facecolor='k',ax=ax,alpha=0.99)

    
    dest_buff = np.unique(row_i.dest_map)
    
    if len(np.unique(row_i.dest_map)) == 1:
        world[world.name== dest_buff[0]].plot(facecolor = bin_cols[row_i.binned],linewidth= map_lw,edgecolor='k',ax=ax)
        
        if dest_buff[0]!= 'France':
            buff_coords = [world[world.name== dest_buff[0]].geometry.centroid.x.values[0],
                           world[world.name== dest_buff[0]].geometry.centroid.y.values[0]]
            dest_coords = buff_coords
        else:
            dest_coords = [1.5,47]
    else:
        print('error in destinaion')
        
    if row_i.origin_split_map != []:
        sorted_val , sorted_origin = zip(*sorted(zip(row_i.value,row_i.origin_split_map))[::-1])
                
        # print(row_i.value,row_i.origin,row_i.origin_split_map)
        # print(sorted_val[:max_links] , sorted_origin[:max_links],row_i.binned,row_i.value_sum)

        # print('--')
                
        per_vals = [str(int(np.round(i*100/np.sum(sorted_val)))) for i in sorted_val]
        
        i_met_origin = 0
        for origin_entry in sorted_origin[:max_links]:
            if per_vals[i_met_origin] != '0':
                plt.annotate(str(origin_entry)+': ' + per_vals[i_met_origin] + '%',
                             xy = (start_met_x,start_met_y+(i_met_origin*inc_met)), size=6,c='silver')
            
                if origin_entry != 'V/U':
                    origin_coords = [world[world.name== origin_entry].geometry.centroid.x.values[0],
                                     world[world.name== origin_entry].geometry.centroid.y.values[0]]     
                    
                    if origin_entry not in conflict_regs:
                        world[world.name== origin_entry].plot(facecolor = 'grey',linewidth= map_lw,edgecolor='k',ax=ax)
    
                    plt.plot([origin_coords[0],dest_coords[0]],[origin_coords[1],dest_coords[1]],
                             c='grey',lw=0.25)
    
                    arrow_x = (origin_coords[0]+dest_coords[0])/2.
                    arrow_y = (origin_coords[1]+dest_coords[1])/2.
                    plt.arrow(origin_coords[0], origin_coords[1],(arrow_x - origin_coords[0]), (arrow_y - origin_coords[1])
                              , shape='full', lw=0.125,length_includes_head=True, head_width=.75,color='grey')
            i_met_origin+=1
    
        plt.annotate('Total: ' + str(row_i.value_sum),xy=(start_met_x,total_met_y),size=8,c=bin_cols[row_i.binned])
        plt.annotate('Destination: '+ dest_buff[0],xy = (start_met_x,dest_met_y), 
                     size=8,c=bin_cols[row_i.binned])
        
    ax.axis('off')
    ax.margins(0.05)
    ax.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)
    
    for bi in range(len(bin_text))[::-1]:
        plt.scatter([start_leg_x+((bi)*inc_x)],[start_leg_y],s=40,c=bin_cols[bi])
        plt.annotate(bin_text[bi],xy=(start_leg_x+(bi*inc_x),start_leg_y+5),size='5',ha='center')
    
    # ax_inset.axis('off')
    # ax_inset = fig_main.add_axes([0.15,0.35,0.125,0.125])
    # ax_inset.margins(0.05)
    # ax_inset.tick_params(left=False, labelleft=False, bottom=False, labelbottom=False)
    # ax_inset.pie(sorted_val[:max_links],autopct='%1.f%%',textprops={'fontsize': 7},labels=sorted_origin[:max_links])
    # ax_inset.legend(sorted_val[:max_links],labels=sorted_origin[:max_links],
    #                 prop={'size': 6},frameon=False,loc='lower center')
    # ax_inset.axis('equal')
    
    
    ########################
    plt.title(r'Conflicts$^\alpha$ and refugee movement$^\beta$ in year '+ str(sel_year),size=8)
    
    plt.annotate(r'Data:${\alpha}$- Gleditsch et al. 2002, ${\beta}$- UNHCR',xy=(120,-67)
                 ,xycoords='data',size=4,va='center')
    plt.scatter([-10],[-67],s=30,facecolor='k',edgecolor='red',marker=(6, 0, 90))
    plt.annotate('Conflict ongoing in region',xy = (-2,-67),xycoords='data',size=6,va='center')
    ax.axis('on')
        
    fig_main.tight_layout()
    plt.savefig(path+'split_by_dest/master_test_axes_'+ str('%3.3d'%plot_counter)+'.pdf'
                ,bbox_inches='tight',pad_inches=0.05)
    plt.close()
    plot_counter += 1