Installing the dependencies of the EDA code, mainly geopandas and folium to visualise the data on the graph. Uncomment the following line to install geopandas and matplotlib if needed.

In [None]:
!pip install geopandas
!pip install matplotlib

In [7]:
import matplotlib.pyplot as plt
from matplotlib import patches
import numpy as np
import pandas as pd
import geopandas
import folium
import folium.plugins as plugins
import branca.colormap
from collections import defaultdict, OrderedDict

##Correlation of the features
Loading the combined dataset into a pandas dataframe and getting the correlation of the features.

In [8]:
df = pd.read_csv('drive/MyDrive/CMPS 276/Milestone 2/Dataset.csv', header=0)
df.corr()

Unnamed: 0,Year,Net Migration Rate,DALYs,HDI,GDP,Life Expectancy,Inflation,Mortality,Healthcare expenditure
Year,1.0,0.002241,-0.223946,0.231989,-0.115377,0.465698,-0.014606,-0.17684,-0.097195
Net Migration Rate,0.002241,1.0,-0.155774,0.23621,0.059809,0.115006,-0.005486,-0.247303,-0.138234
DALYs,-0.223946,-0.155774,1.0,-0.807246,-0.012704,-0.905231,0.048106,0.67075,0.228633
HDI,0.231989,0.23621,-0.807246,1.0,-0.082606,0.916174,-0.046846,-0.56112,-0.425699
GDP,-0.115377,0.059809,-0.012704,-0.082606,1.0,-0.056903,-0.057767,0.122612,0.144635
Life Expectancy,0.465698,0.115006,-0.905231,0.916174,-0.056903,1.0,-0.03978,-0.620443,-0.344102
Inflation,-0.014606,-0.005486,0.048106,-0.046846,-0.057767,-0.03978,1.0,0.069681,0.048965
Mortality,-0.17684,-0.247303,0.67075,-0.56112,0.122612,-0.620443,0.069681,1.0,0.130536
Healthcare expenditure,-0.097195,-0.138234,0.228633,-0.425699,0.144635,-0.344102,0.048965,0.130536,1.0


##Animated Heatmap
First, we will define a helper function to draw the animated heatmap

In [9]:
def prepForHeatMap(df):
      df = df.dropna(thresh=10)

      df = df.astype(dtype= {"HDI":"float64", "DALYs":"float64",
                  "GDP":"float64","Year":"int32", "Net Migration Rate": "float64",
                  "Life Expectancy":"float64", 'Mortality':"float64",
                  "Inflation":"float64", "Healthcare expenditure":"float64"})
                  
      world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

      world.columns=['Pop_est', 'Continent', 'name', 'Country Code', 'gdp_md_est', 'geometry']
      
      world.loc[world["name"] == "Norway", "Country Code"] = 'NOR'
      world.loc[world["name"] == "France", "Country Code"] = 'FRA'
      world.loc[world["name"] == "United States of America", "name"] = 'United States'
      world.loc[world["Country Code"]=='COD', "name"] = 'Democratic Republic of Congo'
      world.loc[world["Country Code"]=='CAF', "name"] = "Central African Republic"
      merge=pd.merge(world,df,on='Country Code')

      merge=pd.merge(world,df,on='Country Code')
      merge = merge.rename(columns={'Net Migration Rate':'migRate',
                              'Life Expectancy':'lifeExp', 'Mortality':'mortality'})
      merge = merge.drop(columns='Country Name')
      merge.insert(8,'Migration Rate', round(merge['migRate'],3))
      for col in merge.columns[9:]:
          for year in range(1955,2020):
              df_sub = (merge[merge['Year']==year]).loc[:, col]
              lim1 = (df_sub - df_sub.mean()) / df_sub.std(ddof=0) < 3
              merge.loc[merge.Year==year, col] = df_sub.where(lim1, df_sub.max())
              df_sub = merge.loc[:, col]
              lim2 = (df_sub - df_sub.mean()) / df_sub.std(ddof=0) > -3
              merge.loc[merge.Year==year, col] = df_sub.where(lim2, df_sub.min())
              temp = merge.dropna(subset=[col])
              merge.loc[merge.Year==year, col] = round((temp.loc[temp.Year==year, col] - temp.loc[temp.Year==year, col].min()) / (temp.loc[temp.Year==year, col].max() - temp.loc[temp.Year==year, col].min()), 3)    
      merge.insert(1,'Center_point', ['']*len(merge))
      merge.insert(2,'long', ['']*len(merge))
      merge.insert(3,'lat', ['']*len(merge))
      merge['Center_point'] = merge['geometry'].to_crs('+proj=cea').centroid.to_crs(merge['geometry'].crs)
      merge["long"] = merge.Center_point.map(lambda p: p.x)
      merge["lat"] = merge.Center_point.map(lambda p: p.y)
      return merge

Next, we will define the function to generate the animated heat map and call on the function to generate the heat map.

In [10]:
def heatMap(df, col):
    m = folium.Map([0,0], zoom_start=2)
    merge = prepForHeatMap(df)

    data = defaultdict(list)
    for r in merge.iterrows():
        r = r[1]
        if r[col]==0: r[col]=0.001
        data[r['Year']].append([r['lat'], r['long'], r[col]])
    data = OrderedDict(sorted(data.items(), key=lambda t: t[0]))

    steps=10
    colormap = branca.colormap.LinearColormap(['purple', 'red', 'orange', 'yellow', 'green'],
                  index=[0,0.25,0.5,0.7,0.9]).to_step(steps)          
    gradient_map=defaultdict(dict)
    for i in range(steps):
        gradient_map[str(round(1/steps*i, 3))] = colormap.rgb_hex_str(1/steps*i)
    colormap.add_to(m)

    hm = plugins.HeatMapWithTime(data=list(data.values()),
                        index=list(data.keys()),
                        index_steps=1,
                        radius=30,
                        auto_play=True,
                        max_opacity=0.5, 
                        min_opacity=0.2,
                        gradient=dict(gradient_map))
    
    hm.add_to(m)
    m.save('heatMap.html')
    return m

heatMap(df, 'migRate')

##Map with feature stacking


In [11]:
def getHTML(df, year=None):
    merge = prepForHeatMap(df)
    merge = merge.replace('Unknown', np.nan)
    merge = merge.drop(columns=['Center_point', 'long', 'lat', 'Pop_est', 'Continent', 'Country Code', 'gdp_md_est'],errors='ignore')
    if year!=None:
      startYear = year
      endYear = year+1
    else:
      startYear=1990
      endYear=2021
    print("Generating Stacked Maps...")
    for year in range(startYear,endYear):
        if year!=endYear-1: print(year, end=' - ')
        else: print(year)
        my_map = folium.Map([0,0], zoom_start=2)
        data = merge[merge['Year']==year]
        
        folium.Choropleth(
                geo_data=data,
                data=data,
                columns=['name', 'migRate'],
                key_on="feature.properties.name",
                fill_color='YlOrRd',
                fill_opacity=0.8,
                line_opacity=0.1,
                legend_name="Net Migration Rate",
                smooth_factor=0,
                Highlight= True,
                line_color = "#0000",
                name = "Net Migration Rate",
                show=True,
                overlay=True,
                nan_fill_color = "White"
                ).add_to(my_map)
        
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'DALYs'],
            key_on="feature.properties.name",
            fill_color='BuPu',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="DALYs",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "DALYs",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)  
        
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'GDP'],
            key_on="feature.properties.name",
            fill_color='BuGn',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="GDP",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "GDP",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)
        
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'Inflation'],
            key_on="feature.properties.name",
            fill_color='PuRd',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="Inflation",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "Inflation",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)  
        
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'HDI'],
            key_on="feature.properties.name",
            fill_color='YlGnBu',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="HDI",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "HDI",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)
        
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'mortality'],
            key_on="feature.properties.name",
            fill_color='BuPu',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="Mortality",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "mortality",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)
        
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'lifeExp'],
            key_on="feature.properties.name",
            fill_color='YlGnBu',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="Life Expectancy",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "Life Expectancy",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)
        folium.Choropleth(
            geo_data=data,
            data=data,
            columns=['name', 'Healthcare expenditure'],
            key_on="feature.properties.name",
            fill_color='YlOrBr',
            fill_opacity=0.8,
            line_opacity=0.1,
            legend_name="Healthcare expenditure",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            name = "Healthcare expenditure",
            show=True,
            overlay=True,
            nan_fill_color = "White"
            ).add_to(my_map)
            
        style_function = lambda x: {'fillColor': '#ffffff', 
                                    'color':'#000000', 
                                    'fillOpacity': 0.1, 
                                    'weight': 0.1}
        highlight_function = lambda x: {'fillColor': '#000000', 
                                        'color':'#000000', 
                                        'fillOpacity': 0.50, 
                                        'weight': 0.1}
        NIL = folium.features.GeoJson(
            data = data,
            style_function=style_function, 
            control=False,
            highlight_function=highlight_function, 
            tooltip=folium.features.GeoJsonTooltip(
                fields=['name','Migration Rate'],
                aliases=['Country','Migration Rate'],
                style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
            )
        )
        my_map.add_child(NIL)
        my_map.keep_in_front(NIL)
        
        folium.LayerControl(collapsed=False, autoZIndex=False).add_to(my_map)
        my_map.save(f'drive/MyDrive/CMPS 276/Milestone 2/StackedMaps/{year}.html')
    return my_map


In [12]:
getHTML(df)

Generating Stacked Maps...
1990 - 1991 - 1992 - 1993 - 1994 - 1995 - 1996 - 1997 - 1998 - 1999 - 2000 - 2001 - 2002 - 2003 - 2004 - 2005 - 2006 - 2007 - 2008 - 2009 - 2010 - 2011 - 2012 - 2013 - 2014 - 2015 - 2016 - 2017 - 2018 - 2019 - 2020
