In [36]:
import numpy as np
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go
import warnings
warnings.filterwarnings('ignore')

In [37]:
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [39]:
init_notebook_mode(connected=True)

In [40]:
df = pd.read_csv('UN_refugees_data.csv')

In [41]:
df.head(10)

Unnamed: 0,Country or territory of asylum or residence,Country or territory of origin,Year,Total Refugees
0,Afghanistan,Iraq,2016,1.0
1,Afghanistan,Islamic Rep. of Iran,2016,33.0
2,Afghanistan,Pakistan,2016,59737.0
3,Albania,China,2016,11.0
4,Albania,Dem. Rep. of the Congo,2016,3.0
5,Albania,Egypt,2016,3.0
6,Albania,Iraq,2016,23.0
7,Albania,Islamic Rep. of Iran,2016,17.0
8,Albania,Montenegro,2016,2.0
9,Albania,Peru,2016,1.0


In [42]:
df = df[df['Total Refugees'].notnull()] # filtering out missing values

In [43]:
df.Year.unique()

array([2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007, 2006,
       2005, 2004, 2003, 2002, 2001, 2000, 1999, 1998, 1997, 1996, 1995,
       1994, 1993, 1992, 1991, 1990, 1989, 1988, 1987, 1986, 1985, 1984,
       1983, 1982, 1981, 1980, 1979, 1978, 1977, 1976, 1975])

In [44]:
df['Total Refugees'] = df['Total Refugees'].astype(int)

In [45]:
df.head()

Unnamed: 0,Country or territory of asylum or residence,Country or territory of origin,Year,Total Refugees
0,Afghanistan,Iraq,2016,1
1,Afghanistan,Islamic Rep. of Iran,2016,33
2,Afghanistan,Pakistan,2016,59737
3,Albania,China,2016,11
4,Albania,Dem. Rep. of the Congo,2016,3


In [46]:
usa = df[(df['Country or territory of asylum or residence'] == 'United States') &
         (df['Country or territory of origin'] != 'Various')]

In [47]:
usa = usa.sort_values('Total Refugees', ascending=False).reset_index()
usa.head()

Unnamed: 0,index,Country or territory of asylum or residence,Country or territory of origin,Year,Total Refugees
0,88932,United States,Russian Federation,1994,245238
1,90373,United States,Russian Federation,1993,240641
2,91205,United States,Russian Federation,1992,211661
3,88948,United States,Viet Nam,1994,211376
4,90388,United States,Viet Nam,1993,205469


# Countries with largest number of refugees residing in the US

In [48]:
max_per_country = usa.groupby('Country or territory of origin')['Total Refugees'].max().sort_values(ascending=False).reset_index()

In [49]:
max_per_country = max_per_country.head(20)

In [50]:
df_map = pd.merge(max_per_country, usa, how='inner', on='Total Refugees') # merging to also get the year information.

In [51]:
df_map = df_map[['Country or territory of origin_x', 'Total Refugees', 'Year' ]] #delete irrelevant columns

In [52]:
df_map.head()

Unnamed: 0,Country or territory of origin_x,Total Refugees,Year
0,Russian Federation,245238,1994
1,Viet Nam,211376,1994
2,Bosnia and Herzegovina,114615,2006
3,China,74020,2015
4,Somalia,72546,2006


In [61]:
colorscale=[[0, 'rgb(31,120,180)'], [0.25, 'rgb(178,223,138)'], 
            [0.50, 'rgb(51,160,44)'], [0.85, 'rgb(251,154,153)'], [1, 'rgb(227,26,28)']]

data1 = [go.Choropleth(
    colorscale = colorscale,
    locations = df_map['Country or territory of origin_x'],
    z = df_map['Total Refugees'],
    locationmode = 'country names',
    text = "Year: " + (df_map['Year'].astype(str)),
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(255,255,255)',
            width = 2
        )),
    colorbar = go.choropleth.ColorBar(
        title = "Number of Refugees")
)]


layout = go.Layout(
    title = go.layout.Title(
        text = 'Largest Number of Refugees in USA since 1975 by Country of Origin'
    ),
    geo = go.layout.Geo(
        showframe = False,
        center = dict(lon=38,lat=9),
        showcoastlines = True,
        showland = True,
        landcolor = "rgb(229, 229, 229)",
        countrycolor = "rgb(255, 255, 255)" ,
        coastlinecolor = "rgb(255, 255, 255)",
        projection = go.layout.geo.Projection(
            type = 'equirectangular'
),
            lataxis = go.layout.geo.Lataxis(
            range = [-40, 90],
        ),
        lonaxis = go.layout.geo.Lonaxis(
            range = [-140, 135],
            showgrid = True,
            dtick = 20
        ),
    )
)

fig = go.Figure(data = data1 , layout = layout)
iplot(fig, filename = 'Refugees in USA')

# Countries with largest number of refugees residing in the US in 2016

In [54]:
df_map2016 = usa[usa['Year'] == 2016].head(20)

In [55]:
df_map2016 = df_map2016.replace(['Islamic Rep. of Iran','Venezuela (Bolivarian Republic of)',
                    'Syrian Arab Rep.', 'Russian Federation'], ['Iran','Venezuela', "Syria", 'Russia'])


In [62]:
df_popul = pd.read_csv('population_by_country.csv')
df_popul.head()

Unnamed: 0,Country,1975,1976,1977,1978,1979,1980,1981,1982,1983,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Aruba,60657.0,60586.0,60366.0,60103.0,59980.0,60096.0,60567.0,61345.0,62201.0,...,101220.0,101353.0,101453.0,101669.0,102053.0,102577.0,103187.0,103795.0,104341.0,104822.0
1,Afghanistan,12590286.0,12840299.0,13067538.0,13237734.0,13306695.0,13248370.0,13053954.0,12749645.0,12389269.0,...,26616792.0,27294031.0,28004331.0,28803167.0,29708599.0,30696958.0,31731688.0,32758020.0,33736494.0,34656032.0
2,Angola,7682479.0,7900997.0,8130988.0,8376147.0,8641521.0,8929900.0,9244507.0,9582156.0,9931562.0,...,20997687.0,21759420.0,22549547.0,23369131.0,24218565.0,25096150.0,25998340.0,26920466.0,27859305.0,28813463.0
3,Albania,2404831.0,2458526.0,2513546.0,2566266.0,2617832.0,2671997.0,2726056.0,2784278.0,2843960.0,...,2970017.0,2947314.0,2927519.0,2913021.0,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0
4,Andorra,30705.0,31777.0,32771.0,33737.0,34818.0,36067.0,37500.0,39114.0,40867.0,...,82683.0,83861.0,84462.0,84449.0,83751.0,82431.0,80788.0,79223.0,78014.0,77281.0


In [63]:
df_map2016

Unnamed: 0,index,Country or territory of asylum or residence,Country or territory of origin,Year,Total Refugees
30,5123,United States,China,2016,72507
136,5161,United States,Haiti,2016,18484
182,5141,United States,El Salvador,2016,14331
217,5157,United States,Guatemala,2016,11328
220,5140,United States,Egypt,2016,11220
243,5145,United States,Ethiopia,2016,10216
304,5162,United States,Honduras,2016,7128
316,5203,United States,Nepal,2016,6784
334,5249,United States,Syria,2016,6444
350,5195,United States,Mexico,2016,6161


In [57]:
import plotly.plotly as py
import plotly.graph_objs as go

import squarify

x = 0.
y = 0.
width = 100.
height = 100.

values = df_map2016['Total Refugees'].tolist()
countries = df_map2016['Country or territory of origin'].tolist()

normed = squarify.normalize_sizes(values, width, height)
rects = squarify.squarify(normed, x, y, width, height)

color_brewer = ['rgb(117,107,177)', 'rgb(158,154,200)', 'rgb(188,189,220)', 'rgb(218,218,235)',
    'rgb(242,240,247)']

shapes = []
annotations = []
counter = 0
color_count = 0

for r in rects:
    shapes.append( 
        dict(
            type = 'rect', 
            x0 = r['x'], 
            y0 = r['y'], 
            x1 = r['x']+r['dx'], 
            y1 = r['y']+r['dy'],
            line = dict( width = 2 ),
            fillcolor = color_brewer[color_count]
        )
    )
        
    color_count += 1
    if color_count >= len(color_brewer):
        color_count = 4
        
    annotations.append(
        dict(
            x = r['x']+(r['dx']/2),
            y = r['y']+(r['dy']/2),
            text = countries[counter],
            showarrow = False
        )
    )
    counter = counter + 1
    if color_count >= len(color_brewer):
        color_count = 4

trace0 = go.Scatter(
    text = [ v for v in values ], 
    mode = 'text',
)
        
layout = go.Layout(
    title = go.layout.Title(
        text = '<b>Largest Number of Refugees in USA in 2016 by Country of Origin</b>'
    ),

    height=730, 
    width=730,
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    shapes=shapes,
    
    annotations=annotations,
    hovermode='closest'

)

figure = dict(data=[trace0], layout=layout)
iplot(figure, filename='treemap')

In [None]:
df_europe = pd.read_csv('europe_countries.csv', header=None, sep=';')

In [None]:
country_list = df_europe[0].tolist()

In [None]:
df_map2016