# Taking a look at some malaria data using Altair
> Just testing

- toc: true 
- badges: false
- comments: true
- categories: [jupyter, malaria]
- image: images/chart-preview.png

In [1]:
import altair as alt
import pandas as pd
import requests     
import json         
import matplotlib.pyplot as plt
import math

In [2]:
#hide
# Package list of ADH CKAN
packages = 'https://ckan.africadatahub.org/api/3/action/package_list'

# Make the HTTP request
response = requests.get(packages)

# Use the json module to load CKAN's response into a dictionary
response_dict = json.loads(response.content)

# Check the contents of the response
assert response_dict['success'] is True  # make sure if response is OK

datasets = response_dict['result']         # extract all the packages from the response
#print(len(datasets))                       # print the total number of datasets

#print(datasets)

# Specify the package you are interested in:
package = 'who-malaria'

# Base url for package information. This is always the same.
base_url = 'https://ckan.africadatahub.org/api/3/action/package_show?id='

# Construct the url for the package of interest
package_information_url = base_url + package

# Make the HTTP request
package_information = requests.get(package_information_url)

# Use the json module to load CKAN's response into a dictionary
package_dict = json.loads(package_information.content)

# Check the contents of the response.
assert package_dict['success'] is True  # again make sure if response is OK
package_dict = package_dict['result']   # we only need the 'result' part from the dictionary
data_id = package_dict['resources'][1]['id']

## Get the data
The data can be found on ckan [here](https://ckan.africadatahub.org/dataset/who-malaria/resource/3fb5a88a-c48b-432d-a9b4-d76b26363705).

In [3]:
#hide_input
# get the data
r = requests.request('GET', 'https://ckan.africadatahub.org/api/3/action/datastore_search?resource_id=%s&limit=5000'%(data_id))
c = json.loads(r.content)
df = pd.json_normalize(c['result']['records'])
print(df.head(10))


   _id       IndicatorCode                           Indicator ValueType  \
0    1  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
1    2  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
2    3  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
3    4  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
4    5  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
5    6  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
6    7  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
7    8  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
8    9  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   
9   10  MALARIA_EST_DEATHS  Estimated number of malaria deaths      text   

  ParentLocationCode   ParentLocation Location type SpatialDimValueCode  \
0                AMR         Americas       Country                 BLZ   
1            

As we can see, there are a lot of columns in this dataset. We are also only interested in African countries, so we can select the countries and columns as follows.

In [4]:
df = df[df.ParentLocation=='Africa']
cols = ['SpatialDimValueCode', 'Location','Indicator','Period','FactValueNumeric','FactValueNumericLow','FactValueNumericHigh','DateModified']
df = df.loc[:,cols]
print(df.head(10))


   SpatialDimValueCode               Location  \
2                  CPV             Cabo Verde   
5                  SWZ               Eswatini   
12                 STP  Sao Tome and Principe   
18                 BEN                  Benin   
19                 GIN                 Guinea   
20                 COM                Comoros   
22                 GHA                  Ghana   
23                 TCD                   Chad   
24                 KEN                  Kenya   
27                 CMR               Cameroon   

                             Indicator  Period  FactValueNumeric  \
2   Estimated number of malaria deaths    2020               0.0   
5   Estimated number of malaria deaths    2020               0.0   
12  Estimated number of malaria deaths    2020               0.0   
18  Estimated number of malaria deaths    2020           10123.0   
19  Estimated number of malaria deaths    2020           10215.0   
20  Estimated number of malaria deaths    2020      

In [5]:
df_kenya = df[df.Location=='Kenya']

In [6]:
# Create a centered title
title = alt.TitleParams('Estimated number of deaths due to Malaria in Kenya', anchor='middle')
line = alt.Chart(df_kenya,title=title).mark_line().encode(
alt.X('Period:O',title='Year'), # :O tells altair that the data is ordinal
alt.Y('FactValueNumeric',title='Number of Deaths'))

point = alt.Chart(df_kenya).mark_area(opacity=0.3).encode(
alt.X('Period:O'),
alt.Y('FactValueNumericLow'),
alt.Y2('FactValueNumericHigh'),
tooltip=['Period','FactValueNumericLow','FactValueNumeric','FactValueNumericHigh'])

In [13]:
#hide
x = line+point
x.save('chart.html')
df_kenya.to_html('kenya.html')
df_kenya.to_csv('kenya.csv',index=False)

In [15]:
#hide_input
x