In [20]:
import pandas as pd
import numpy as np

child_mortality = pd.read_csv('child-mortality.csv')
child_mortality.head(100)

Unnamed: 0,Entity,Code,Year,"Child mortality (Select Gapminder, v10) (2017)"
0,Afghanistan,AFG,1957,381.194
1,Afghanistan,AFG,1958,375.190
2,Afghanistan,AFG,1959,369.361
3,Afghanistan,AFG,1960,363.700
4,Afghanistan,AFG,1961,357.500
...,...,...,...,...
95,Albania,ALB,2013,14.900
96,Albania,ALB,2014,14.400
97,Albania,ALB,2015,14.000
98,Albania,ALB,2016,13.500


In [21]:
def embed_map(m, file_name):
    from IPython.display import IFrame
    m.save(file_name)
    return IFrame(file_name, width='100%', height='500px')

In [22]:
child_mortality.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13512 entries, 0 to 13511
Data columns (total 4 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   Entity                                          13512 non-null  object 
 1   Code                                            13512 non-null  object 
 2   Year                                            13512 non-null  int64  
 3   Child mortality (Select Gapminder, v10) (2017)  13512 non-null  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 422.4+ KB


### TASK 1 : Minimum values of child mortality in different countries

In [23]:
child_mortality.rename(columns={"Child mortality (Select Gapminder, v10) (2017)": "Child_mortality"}, inplace=True)
df = child_mortality[["Entity", "Code", "Child_mortality"]]
df.groupby("Code").min().sort_values("Child_mortality")

Unnamed: 0_level_0,Entity,Child_mortality
Code,Unnamed: 1_level_1,Unnamed: 2_level_1
ISL,Iceland,2.100
HKG,Hong Kong,2.282
FIN,Finland,2.300
SVN,Slovenia,2.300
LUX,Luxembourg,2.400
...,...,...
MLI,Mali,110.600
SLE,Sierra Leone,113.500
CAF,Central African Republic,123.600
TCD,Chad,127.300


### TASK 2 : Least value of CM in countries in 2016

In [24]:
# Check if 2017 is available for all countries
df = child_mortality[child_mortality["Year"] == 2016]
df = df[["Entity", "Code", "Child_mortality"]]  # Use only relevant columns
df.sort_values("Child_mortality", inplace=True)

df.head()

Unnamed: 0,Entity,Code,Child_mortality
5528,Iceland,ISL,2.1
5217,Hong Kong,HKG,2.282
10883,Slovenia,SVN,2.3
4087,Finland,FIN,2.3
7229,Luxembourg,LUX,2.4


### TASK 3 : Plot this on the map. Create a Choropleth

In [25]:
import folium
from folium import Choropleth
import geopandas as gpd

In [26]:
worldfilepath = gpd.datasets.get_path('naturalearth_lowres')
world = gpd.read_file(worldfilepath)
print(len(world))
world.head()

177


Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
0,920938,Oceania,Fiji,FJI,8374.0,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,53950935,Africa,Tanzania,TZA,150600.0,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,603253,Africa,W. Sahara,ESH,906.5,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,35623680,North America,Canada,CAN,1674000.0,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,326625791,North America,United States of America,USA,18560000.0,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."


In [27]:
child_mortality.rename(columns={"Code": "iso_a3"}, inplace=True)
child_mortality.head()

Unnamed: 0,Entity,iso_a3,Year,Child_mortality
0,Afghanistan,AFG,1957,381.194
1,Afghanistan,AFG,1958,375.19
2,Afghanistan,AFG,1959,369.361
3,Afghanistan,AFG,1960,363.7
4,Afghanistan,AFG,1961,357.5


In [28]:
df = child_mortality[child_mortality["Year"] == 2016]
df = df.merge(world, on="iso_a3")

c_m = df[["iso_a3", "Child_mortality"]].set_index("iso_a3")
country_geometry = df[["iso_a3", "geometry"]].set_index("iso_a3")

country_geom_gdf = gpd.GeoDataFrame(country_geometry, crs="EPSG:4326")
country_geom_gdf.head()

Unnamed: 0_level_0,geometry
iso_a3,Unnamed: 1_level_1
AFG,"POLYGON ((66.51861 37.36278, 67.07578 37.35614..."
ALB,"POLYGON ((21.02004 40.84273, 20.99999 40.58000..."
DZA,"POLYGON ((-8.68440 27.39574, -8.66512 27.58948..."
AGO,"MULTIPOLYGON (((12.99552 -4.78110, 12.63161 -4..."
ARG,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000..."


In [29]:
# Finally, lets create the map
m = folium.Map(location=[0.0, 0.0], tiles='openstreetmap', zoom_start=2)

Choropleth(geo_data=country_geom_gdf.__geo_interface__,
          data=c_m["Child_mortality"], key_on='feature.id', fill_color='YlGnBu', legend_name='Child Mortality(2016)').add_to(m)

embed_map(m, 'Child_mortality-2016.html')

### TASK 4 : Compare this to how it was back in 1990

In [30]:
# Get data for 1990
data_1990 = child_mortality[child_mortality["Year"] == 1990].copy()

# Merge data to make sure only countries for which data is available are plotted
merged_df = data_1990.merge(world, on="iso_a3")

# Convert to GeoDataFrame
geom_1990 = gpd.GeoDataFrame(merged_df[["iso_a3", "geometry"]].set_index("iso_a3"), crs="EPSG:4326")

# 1990's data for child_mortality
data = merged_df[["iso_a3", "Child_mortality"]].set_index("iso_a3")

In [31]:
m = folium.Map(location=[0.0, 0.0], tiles='openstreetmap', zoom_start=2)
Choropleth(geo_data=geom_1990.__geo_interface__, data=data["Child_mortality"],
          key_on='feature.id', fill_color='YlGnBu', legend_name="Child Mortality(1990)").add_to(m)

embed_map(m, 'Child_mortality-1990.html')

#### Overall, the two maps look almost the same. Child mortality rate had reduced drastically since the 1990's(as you can see in the legend)

### TASK 5 : Make a similar map with the plot corrsponding to the percentage of reduction in the child mortality rate. Use the first occurence and last occurence of data for each country 

In [32]:
# First merge the two dataframes
merged = child_mortality.merge(world, on="iso_a3")

In [33]:
# child_mortality.groupby("iso_a3").apply()
reduction = merged.groupby('iso_a3')[['Child_mortality', 'Year']].agg(['first', 'last'])

# Find the reduction in child mortality
reduction["Percentage"] = (reduction['Child_mortality']["last"]
                           - reduction['Child_mortality']['first'])* -100.0 / reduction['Child_mortality']['first']
reduction["Year_diff"] = reduction["Year"]["last"] - reduction['Year']['first']

reduction['Percentage_per_year'] = reduction['Percentage'] / reduction['Year_diff']

print(len(reduction))
reduction.head()

167


Unnamed: 0_level_0,Child_mortality,Child_mortality,Year,Year,Percentage,Year_diff,Percentage_per_year
Unnamed: 0_level_1,first,last,first,last,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
iso_a3,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
AFG,381.194,70.4,1957,2016,81.531714,59,1.381893
AGO,236.4,82.5,1980,2016,65.101523,36,1.808376
ALB,90.8,13.5,1978,2016,85.132159,38,2.24032
ARE,204.2,7.7,1960,2016,96.229187,56,1.718378
ARG,238.82,11.1,1911,2016,95.352148,105,0.908116


#### Now plot the percentage on the choropleth. This would tell us how the rates of child mortality have dropped across different regions.
As data is available from different start points, some of the countries might not be representative of the actual drop they have experienced. Eg. Russia - dat available from 2012, where the child mortality is already quite small. So, we will plot the percentage reduction per year for which data is available

In [34]:
# Create the GeoDataFrame
geometry_data = merged[merged["Year"] == 2016]
geometry_data = geometry_data[["iso_a3", "geometry"]].set_index("iso_a3")

geometry_data_gdf = gpd.GeoDataFrame(geometry_data, crs="EPSG:4326")

print(len(geometry_data_gdf))
geometry_data_gdf.head()

167


Unnamed: 0_level_0,geometry
iso_a3,Unnamed: 1_level_1
AFG,"POLYGON ((66.51861 37.36278, 67.07578 37.35614..."
ALB,"POLYGON ((21.02004 40.84273, 20.99999 40.58000..."
DZA,"POLYGON ((-8.68440 27.39574, -8.66512 27.58948..."
AGO,"MULTIPOLYGON (((12.99552 -4.78110, 12.63161 -4..."
ARG,"MULTIPOLYGON (((-68.63401 -52.63637, -68.25000..."


In [35]:
# Finally, create the base map and the Choropleth
m = folium.Map(location=[0.0, 0.0], tiles='openstreetmap', zoom_start=2)

# Choropleth
Choropleth(geo_data=geometry_data_gdf.__geo_interface__, data=reduction['Percentage'],
          key_on='feature.id', fill_color='YlGnBu', legend_name="Percentage reduction in Child Mortality").add_to(m)

m_1 = folium.Map(location=[0.0, 0.0], tiles='openstreetmap', zoom_start=2)

# Choropleth
Choropleth(geo_data=geometry_data_gdf.__geo_interface__, data=reduction['Percentage_per_year'],
          key_on='feature.id', fill_color='YlGnBu', legend_name="Percentage reduction in Child Mortality").add_to(m_1)

embed_map(m, "Child_mortality_percentage_reduction.html")

In [36]:
embed_map(m_1, 'Child_mortality_percentage_reduction_per_year.html')

### Lets create a time slider choropleth which plots the transition of child mortality over the years

In [37]:
# creating 11 bins to check values of child_mortality
bins = np.linspace(min(child_mortality['Child_mortality']), max(child_mortality['Child_mortality']), 11)
bins

array([  2.1  ,  70.098, 138.096, 206.094, 274.092, 342.09 , 410.088,
       478.086, 546.084, 614.082, 682.08 ])

In [41]:
child_mortality['color'] = pd.cut(
    child_mortality['Child_mortality'], bins,
    labels=['#FFEBEB','#F8D2D4','#F2B9BE','#EBA1A8','#E58892','#DE6F7C','#D85766','#D13E50','#CB253A','#C50D24'],
    include_lowest=False)
child_mortality['color'].replace(np.nan, '#32CD32', inplace=True)
child_mortality.color.value_counts()

#FFEBEB    6480
#F8D2D4    2619
#F2B9BE    1766
#EBA1A8    1371
#E58892     827
#DE6F7C     303
#D85766      97
#D13E50      38
#C50D24       5
#CB253A       5
Name: color, dtype: int64

In [60]:
plot_data = child_mortality[["iso_a3", "Year", "color"]]
plot_data.head()

Unnamed: 0,iso_a3,Year,color
0,AFG,1957,#DE6F7C
1,AFG,1958,#DE6F7C
2,AFG,1959,#DE6F7C
3,AFG,1960,#DE6F7C
4,AFG,1961,#DE6F7C


In [61]:
# Now add rows for countries which do not have recordings for a particular year
for year in plot_data['Year'].unique():
    diff = set(plot_data['iso_a3'].unique()) - set(plot_data[plot_data['Year'] == year]['iso_a3'])
    for country in diff:
        plot_data = pd.concat([plot_data, pd.DataFrame([[country, year, '#0073CF']], columns=['iso_a3', 'Year', 'color'])],
                              ignore_index=True)
plot_data.sort_values('Year', inplace=True)
plot_data.head()

Unnamed: 0,iso_a3,Year,color
11491,SWE,1800,#DE6F7C
44661,SDN,1800,#0073CF
44662,DMA,1800,#0073CF
44663,MLI,1800,#0073CF
44664,TUR,1800,#0073CF


In [68]:
p_data = plot_data.copy()
# p_data['Year'] = (p_data['Year'].astype(int) * 10**6).astype('U10')
p_data.head()

plot_dict = {}
for i in p_data['iso_a3'].unique():
    plot_dict[i] = {}
    for j in p_data[p_data['iso_a3'] == i].set_index(['iso_a3']).values:   
        plot_dict[i][j[0]] = {'color': j[1], 'opacity': 0.7 }
list(plot_dict.items())[10]


('MKD',
 {1800: {'color': '#0073CF', 'opacity': 0.7},
  1801: {'color': '#0073CF', 'opacity': 0.7},
  1802: {'color': '#0073CF', 'opacity': 0.7},
  1803: {'color': '#0073CF', 'opacity': 0.7},
  1804: {'color': '#0073CF', 'opacity': 0.7},
  1805: {'color': '#0073CF', 'opacity': 0.7},
  1806: {'color': '#0073CF', 'opacity': 0.7},
  1807: {'color': '#0073CF', 'opacity': 0.7},
  1808: {'color': '#0073CF', 'opacity': 0.7},
  1809: {'color': '#0073CF', 'opacity': 0.7},
  1810: {'color': '#0073CF', 'opacity': 0.7},
  1811: {'color': '#0073CF', 'opacity': 0.7},
  1812: {'color': '#0073CF', 'opacity': 0.7},
  1813: {'color': '#0073CF', 'opacity': 0.7},
  1814: {'color': '#0073CF', 'opacity': 0.7},
  1815: {'color': '#0073CF', 'opacity': 0.7},
  1816: {'color': '#0073CF', 'opacity': 0.7},
  1817: {'color': '#0073CF', 'opacity': 0.7},
  1818: {'color': '#0073CF', 'opacity': 0.7},
  1819: {'color': '#0073CF', 'opacity': 0.7},
  1820: {'color': '#0073CF', 'opacity': 0.7},
  1821: {'color': '#0073CF

In [82]:
geometry_data = geometry_data.merge(plot_data, on='iso_a3')
geometry_data = geometry_data[["iso_a3", "geometry"]].set_index("iso_a3")
geometry_data.head()

MemoryError: Unable to allocate 12.7 GiB for an array with shape (1706458271,) and data type int64

In [78]:
from folium.plugins import TimeSliderChoropleth

m = folium.Map(location=[0.0, 0.0], tiles='openstreetmap', zoom_start=2)
time_slider = TimeSliderChoropleth(geometry_data.to_json(default_handler=str),
                                  styledict=plot_dict).add_to(m)
embed_map(m, 'time_slider_choropleth.html')