In [10]:
#!/usr/bin/env python3

import pandas as pd

# In Anaconda navigator I needed to install geopandas and then update it to fix fiona / gdal error
# https://stackoverflow.com/questions/42749254/error-in-importing-geopandas#43762549
# https://github.com/GenericMappingTools/gmt-python/issues/104
import geopandas as gpd

import matplotlib.pyplot as plt

# https://stackoverflow.com/questions/39215202/matplotlib-geopandas-plot-multiple-layers-control-figsize
# https://stackoverflow.com/questions/36367986/how-to-make-inline-plots-in-jupyter-notebook-larger
# https://erdatadoc.com/2016/07/14/jupyter-notebook-matplotlib-figure-display-options-and-pandas-set_option-optimization-tips/
# plt.rcParams['figure.figsize'] = [10, 20]

# display plots inside jupyter
%matplotlib inline

import malaria_scraper


In [11]:
# Create country malaria dataframe

In [12]:
df_malaria = malaria_scraper.get_dataframe_all_countries()
df_malaria.head()

Unnamed: 0,country,areas_with_malaria,estimated_risk
0,Afghanistan,AprilâDecember in all areas at altitudes bel...,moderate
1,Albania,,none
2,Algeria,,none
3,American Samoa,,none
4,Andorra,,none


# Create map dataframe

In [13]:
def get_map_df():
    """
    use geopandas basemap
    
    alternatively could use
    http://ramiro.org/notebook/geopandas-choropleth/
    http://www.naturalearthdata.com/downloads/10m-cultural-vectors/
    :return: geopandas dataframe with country name, areas, geometry
    
    """
    
    # http://geopandas.org/mapping.html
    df_map = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
    
    column_names_lower = [column_name.lower() for column_name in df_map.columns]
    df_map.columns = column_names_lower
    
    df_map.rename({'name': 'country'}, axis='columns', inplace=True)
    
    return df_map

In [14]:
df_map = get_map_df()
print(df_map.dtypes)
df_map.head()

pop_est       float64
continent      object
country        object
iso_a3         object
gdp_md_est    float64
geometry       object
dtype: object


Unnamed: 0,pop_est,continent,country,iso_a3,gdp_md_est,geometry
0,28400000.0,Asia,Afghanistan,AFG,22270.0,"POLYGON ((61.21081709172574 35.65007233330923,..."
1,12799293.0,Africa,Angola,AGO,110300.0,(POLYGON ((16.32652835456705 -5.87747039146621...
2,3639453.0,Europe,Albania,ALB,21810.0,"POLYGON ((20.59024743010491 41.85540416113361,..."
3,4798491.0,Asia,United Arab Emirates,ARE,184300.0,"POLYGON ((51.57951867046327 24.24549713795111,..."
4,40913584.0,South America,Argentina,ARG,573900.0,(POLYGON ((-65.50000000000003 -55.199999999999...


# Merge malaria into GeoDataFrame

In [15]:
df_map = pd.merge(left=df_map, right=df_malaria, on='country', sort=True)
df_map.iloc[35:40]

Unnamed: 0,pop_est,continent,country,iso_a3,gdp_md_est,geometry,areas_with_malaria,estimated_risk
35,531640.0,Asia,Cyprus,CYP,22700.0,"POLYGON ((33.97361657078346 35.058506374648, 3...",,none
36,5500510.0,Europe,Denmark,DNK,203600.0,(POLYGON ((12.69000613775563 55.60999095318078...,,none
37,516055.0,Africa,Djibouti,DJI,1885.0,"POLYGON ((43.08122602720016 12.69963857670712,...",All,no data
38,14573101.0,South America,Ecuador,ECU,107700.0,POLYGON ((-80.30256059438722 -3.40485645916471...,"Areas at altitudes <1,500 m (4,921 ft) in the ...",very low
39,83082869.0,Africa,Egypt,EGY,443700.0,"POLYGON ((34.9226 29.50133, 34.64174 29.09942,...",,none


# Add estimated_risk_int for use in chloropleth map

In [16]:
# estimated_risk_int is for plotting color only
# aside from estimated_risk == 'no data', remaining values are ordinal but not interval or ratio
# https://www.mymarketresearchmethods.com/types-of-data-nominal-ordinal-interval-ratio/
df_map['estimated_risk_int'] = df_map['estimated_risk'].map({'no data': -1, 'none': 0, 'very low': 1, 'low': 2, 'moderate': 3, 'high': 4})
#df_map['estimated_risk'].values

# Plot chloropleth map of estimated malaria risk for US travelers
http://geopandas.org/mapping.html

In [17]:
# don't show Antarctica
# cx coordinate based indexing slice by longitude, latitude
df_map = df_map.cx[:, -60:90]

cmap = 'Greens'
num_colors = 6
vmin=-1
vmax=4

# https://stackoverflow.com/questions/39215202/matplotlib-geopandas-plot-multiple-layers-control-figsize
figsize = (20, 20)
fig, ax = plt.subplots(1, 1, figsize=figsize)
ax.set_title('Estimated Malaria Risk for US Travelers')

df_map.plot(column='estimated_risk_int', cmap=cmap, ax=ax, k=num_colors, legend=False, vmin=vmin, vmax=vmax)

# TODO: Consider if can fix data. merge, assumptions or appearance for several African countries which appear white.

NameError: name 'figsize' is not defined