In [1]:
import pandas as pd
import geopandas as gpd

In [7]:
def choropleth(df, column, title, cmap = 'viridis', ax=None):
    """
    Function to plot a choropleth
    
    Output: Choropleth with a vertical color bar
    
    Input:
    ---
      df : The GeoDataFrame that holds the values to be plotted
      column : The column of the dataframe whose values are plotted
      title : Title of the plot
      cmap : color map used
    """
    f, ax = plt.subplots(figsize=(9,9))
    plt.title(title, size=20)
    ax = df.plot(column=column, colormap=cmap,alpha = 1, linewidth=0.3, scheme='fischer_jenks', axes=ax)
    vmin, vmax = min(df[column].values), \
                max(df[column].values)
    ax.axis('off')
    fig = ax.get_figure()
    cax = fig.add_axes([0.9, 0.1, 0.03, 0.8])
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
    sm._A = []
    fig.colorbar(sm, cax=cax)

In [None]:
# title = "Business Density by Zipcodes, 2000"
# choropleth(zipcodeDF, 'density_2000', title, 'Blues')

In [50]:
refugee_data = pd.read_csv('./Data/ref_country_of_Asylum/18f4ebc1-6751-4d6f-b1c5-28a830b148f9_Data.csv')
refugee_data.head(3)

Unnamed: 0,﻿Series Name,Series Code,Country Name,Country Code,1990 [YR1990],2000 [YR2000],2007 [YR2007],2008 [YR2008],2009 [YR2009],2010 [YR2010],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016]
0,Refugee population by country or territory of ...,SM.POP.REFG,Afghanistan,AFG,50,..,42,37,37,6434,3009,16187,16863,300423,257554,..
1,Refugee population by country or territory of ...,SM.POP.REFG,Albania,ALB,..,523,77,65,70,76,82,86,93,104,104,..
2,Refugee population by country or territory of ...,SM.POP.REFG,Algeria,DZA,169110,169656,94137,94093,94137,94144,94148,94133,94150,94128,94182,..


In [51]:
population_data = pd.read_csv('./Data/world_population/28c06eca-bc68-48c5-b850-652a0893d6fe_Data.csv')
population_data.head(3)

Unnamed: 0,﻿Series Name,Series Code,Country Name,Country Code,1990 [YR1990],2000 [YR2000],2007 [YR2007],2008 [YR2008],2009 [YR2009],2010 [YR2010],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016]
0,"Population, total",SP.POP.TOTL,Afghanistan,AFG,12067570,19701940.0,25877544.0,26528741.0,27207291.0,27962207.0,28809167.0,29726803,30682500,31627506,32526562,..
1,"Population, total",SP.POP.TOTL,Albania,ALB,3286542,3089027.0,2970017.0,2947314.0,2927519.0,2913021.0,2904780.0,2900247,2896652,2893654,2889167,..
2,"Population, total",SP.POP.TOTL,Algeria,DZA,25912364,31183658.0,34261971.0,34811059.0,35401790.0,36036159.0,36717132.0,37439427,38186135,38934334,39666519,..


In [52]:
col_list_pop_2015 = ['Country Name', '2015 [YR2015]']
pop2015 = population_data[col_list_pop_2015]

In [53]:
# renaming
pop2015.columns = ['Country Name', '2015 population']

In [54]:
col_list_ref_2015 = ['Country Name', '2015 [YR2015]']
ref_2015 = refugee_data[col_list_ref_2015]

In [55]:
merged_2015 = pd.merge(ref_2015, pop2015, on='Country Name', how='left')

In [56]:
# changing data from string to numerical
merged_2015['2015 [YR2015]'] = pd.to_numeric(merged_2015['2015 [YR2015]'], errors='coerce')
merged_2015['2015 population'] = pd.to_numeric(merged_2015['2015 population'], errors='coerce')

In [57]:
merged_2015['percentage'] = merged_2015['2015 [YR2015]'] / merged_2015['2015 population']

In [58]:
merged_2015.head()

Unnamed: 0,Country Name,2015 [YR2015],2015 population,percentage
0,Afghanistan,257554.0,32526562.0,0.007918
1,Albania,104.0,2889167.0,3.6e-05
2,Algeria,94182.0,39666519.0,0.002374
3,American Samoa,,55538.0,
4,Andorra,,70473.0,
