## Import pandas

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
world = pd.read_csv("data/malaria-death-rates.csv")

In [4]:
world.head()

Unnamed: 0,Entity,Code,Year,"Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate) (per 100,000 people)"
0,Afghanistan,AFG,1990,6.80293
1,Afghanistan,AFG,1991,6.973494
2,Afghanistan,AFG,1992,6.989882
3,Afghanistan,AFG,1993,7.088983
4,Afghanistan,AFG,1994,7.392472


In [5]:
world.rename(
    columns={"Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate) (per 100,000 people)":"Deaths"},
    inplace=True
)

In [6]:
world.head()

Unnamed: 0,Entity,Code,Year,Deaths
0,Afghanistan,AFG,1990,6.80293
1,Afghanistan,AFG,1991,6.973494
2,Afghanistan,AFG,1992,6.989882
3,Afghanistan,AFG,1993,7.088983
4,Afghanistan,AFG,1994,7.392472


In [7]:
len(world)

6156

In [8]:
## Drop the rows that have no value for Code as these are regions, 
## such as 'Eastern Sub-Saharan Africa' rather than countries

world.dropna(subset=["Code"], inplace=True)

In [9]:
len(world)

5292

In [10]:
world_pivot = world.pivot(index="Code", columns="Year", values="Deaths").reset_index()

In [11]:
world_pivot.head()

Year,Code,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,AFG,6.80293,6.973494,6.989882,7.088983,7.392472,7.405511,7.626773,7.592172,7.796014,...,8.266378,8.0779,7.817882,7.271249,6.826629,6.611425,7.144628,7.137835,7.192283,7.139429
1,AGO,18.691207,19.143211,19.323137,20.144111,21.05879,22.047514,22.810312,24.178357,27.507236,...,48.670414,42.326105,35.912245,31.917778,28.634204,27.546573,28.686822,28.347497,28.380121,27.148632
2,ALB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,AND,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ARE,0.026188,0.026522,0.026914,0.027378,0.02779,0.028186,0.028247,0.028223,0.028186,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
len(world_pivot)

196

In [23]:
## Set the Code column to be the index
world_pivot.set_index("Code", inplace=True)

In [24]:
world_pivot.head()

Year,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFG,6.80293,6.973494,6.989882,7.088983,7.392472,7.405511,7.626773,7.592172,7.796014,7.800592,...,8.266378,8.0779,7.817882,7.271249,6.826629,6.611425,7.144628,7.137835,7.192283,7.139429
AGO,18.691207,19.143211,19.323137,20.144111,21.05879,22.047514,22.810312,24.178357,27.507236,29.926538,...,48.670414,42.326105,35.912245,31.917778,28.634204,27.546573,28.686822,28.347497,28.380121,27.148632
ALB,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AND,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ARE,0.026188,0.026522,0.026914,0.027378,0.02779,0.028186,0.028247,0.028223,0.028186,0.028208,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
## Remove rows that have only zeros 

world_zero = world_pivot.loc[(world_pivot!=0.000000).any(axis=1)]

In [26]:
world_zero.head()

Year,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFG,6.80293,6.973494,6.989882,7.088983,7.392472,7.405511,7.626773,7.592172,7.796014,7.800592,...,8.266378,8.0779,7.817882,7.271249,6.826629,6.611425,7.144628,7.137835,7.192283,7.139429
AGO,18.691207,19.143211,19.323137,20.144111,21.05879,22.047514,22.810312,24.178357,27.507236,29.926538,...,48.670414,42.326105,35.912245,31.917778,28.634204,27.546573,28.686822,28.347497,28.380121,27.148632
ARE,0.026188,0.026522,0.026914,0.027378,0.02779,0.028186,0.028247,0.028223,0.028186,0.028208,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ARG,0.030806,0.02878,0.027253,0.02535,0.023625,0.022181,0.020987,0.019631,0.018529,0.017479,...,0.010093,0.009374,0.008776,0.008083,0.007555,0.007127,0.006674,0.006138,0.005686,0.005287
ARM,0.035989,0.035382,0.037053,0.036795,0.034154,0.030295,0.027778,0.024902,0.022568,0.020964,...,0.0134,0.012915,0.012022,0.010943,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
## Reset the index
world_zero.reset_index(inplace=True)

In [28]:
world_zero.head()

Year,Code,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,AFG,6.80293,6.973494,6.989882,7.088983,7.392472,7.405511,7.626773,7.592172,7.796014,...,8.266378,8.0779,7.817882,7.271249,6.826629,6.611425,7.144628,7.137835,7.192283,7.139429
1,AGO,18.691207,19.143211,19.323137,20.144111,21.05879,22.047514,22.810312,24.178357,27.507236,...,48.670414,42.326105,35.912245,31.917778,28.634204,27.546573,28.686822,28.347497,28.380121,27.148632
2,ARE,0.026188,0.026522,0.026914,0.027378,0.02779,0.028186,0.028247,0.028223,0.028186,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ARG,0.030806,0.02878,0.027253,0.02535,0.023625,0.022181,0.020987,0.019631,0.018529,...,0.010093,0.009374,0.008776,0.008083,0.007555,0.007127,0.006674,0.006138,0.005686,0.005287
4,ARM,0.035989,0.035382,0.037053,0.036795,0.034154,0.030295,0.027778,0.024902,0.022568,...,0.0134,0.012915,0.012022,0.010943,0.0,0.0,0.0,0.0,0.0,0.0


## Export to CSV

In [31]:
world_zero.to_csv("data/malaria-death-rates-by-year.csv", index=False)

In [32]:
!wc -l 'data/malaria-death-rates-by-year.csv'

     115 data/malaria-death-rates-by-year.csv
