## Data source
https://apps.who.int/gho/data/node.main.174?lang=en

## Libraries

In [1]:
import pandas as pd

## Read data

In [2]:
cases = pd.read_csv('cases.csv')
cases.head()

Unnamed: 0,Country,Year,Number of reported cases of cholera
0,Afghanistan,2016,677
1,Afghanistan,2015,58064
2,Afghanistan,2014,45481
3,Afghanistan,2013,3957
4,Afghanistan,2012,12


In [3]:
deaths = pd.read_csv('deaths.csv')
deaths.head()

Unnamed: 0,Country,Year,Number of reported deaths from cholera
0,Afghanistan,2016,5
1,Afghanistan,2015,8
2,Afghanistan,2014,4
3,Afghanistan,2013,14
4,Afghanistan,2012,0


In [4]:
cfr = pd.read_csv('cfr.csv')
cfr.head()

Unnamed: 0,Country,Year,Cholera case fatality rate
0,Afghanistan,2016,0.7
1,Afghanistan,2015,0.01
2,Afghanistan,2014,0.0
3,Afghanistan,2013,0.35
4,Afghanistan,2012,0.1


## Merge data

In [5]:
complete = pd.merge(cases, deaths, how='outer')
complete = pd.merge(complete, cfr, how='outer')
complete.head()

Unnamed: 0,Country,Year,Number of reported cases of cholera,Number of reported deaths from cholera,Cholera case fatality rate
0,Afghanistan,2016,677,5,0.7
1,Afghanistan,2015,58064,8,0.01
2,Afghanistan,2014,45481,4,0.0
3,Afghanistan,2013,3957,14,0.35
4,Afghanistan,2012,12,0,0.1


In [6]:
complete.isna().sum()

Country                                     0
Year                                        0
Number of reported cases of cholera        22
Number of reported deaths from cholera    117
Cholera case fatality rate                127
dtype: int64

## WHO Region

In [7]:
who_region = {}

# African Region AFRO
afro = "Algeria, Angola, Cabo Verde, Eswatini, Sao Tome and Principe, Benin, South Sudan, Western Sahara, Congo (Brazzaville), Congo (Kinshasa), Cote d'Ivoire, Botswana, Burkina Faso, Burundi, Cameroon, Cape Verde, Central African Republic, Chad, Comoros, Ivory Coast, Côte d'Ivoire, Democratic Republic of the Congo, Equatorial Guinea, Eritrea, Ethiopia, Gabon, Gambia, Ghana, Guinea, Guinea-Bissau, Kenya, Lesotho, Liberia, Madagascar, Malawi, Mali, Mauritania, Mauritius, Mozambique, Namibia, Niger, Nigeria, Republic of the Congo, Rwanda, São Tomé and Príncipe, Senegal, Seychelles, Sierra Leone, Somalia, South Africa, Swaziland, Togo, Uganda, Tanzania, United Republic of Tanzania, Zambia, Zimbabwe"
afro = [i.strip() for i in afro.split(',')]
for i in afro:
    who_region[i] = 'Africa'
    
# Region of the Americas PAHO
paho = 'Antigua and Barbuda, Argentina, Bahamas, Barbados, Belize, Bolivia, Bolivia (Plurinational State of), Brazil, Canada, Chile, Colombia, Congo, Costa Rica, Cuba, Dominica, Dominican Republic, Ecuador, El Salvador, Grenada, Guatemala, Guyana, Haiti, Honduras, Jamaica, Mexico, Nicaragua, Panama, Paraguay, Peru, Saint Kitts and Nevis, Saint Lucia, Saint Vincent and the Grenadines, Suriname, Trinidad and Tobago, United States, US, United States of America, Uruguay, Venezuela, Venezuela (Bolivarian Republic of)'
paho = [i.strip() for i in paho.split(',')]
for i in paho:
    who_region[i] = 'Americas'

# South-East Asia Region SEARO
searo = 'Bangladesh, Bhutan, India, Indonesia, Maldives, Myanmar, Burma, Nepal, Sri Lanka, Thailand, Timor-Leste'
searo = [i.strip() for i in searo.split(',')]
for i in searo:
    who_region[i] = 'South-East Asia'

# European Region EURO
euro = 'Albania, Andorra, Greenland, Kosovo, Holy See, Liechtenstein, Armenia, Czechia, Austria, Azerbaijan, Belarus, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Georgia, Germany, Greece, Hungary, Iceland, Ireland, Israel, Italy, Kazakhstan, Kyrgyzstan, Latvia, Lithuania, Luxembourg, Malta, Monaco, Montenegro, Netherlands, North Macedonia, Republic of North Macedonia, Norway, Poland, Portugal, Moldova, Republic of Moldova, Romania, Russia, Russian Federation, San Marino, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Tajikistan, Turkey, Turkmenistan, Ukraine, United Kingdom, United Kingdom of Great Britain and Northern Ireland, Uzbekistan, The former state union Serbia and Montenegro'
euro = [i.strip() for i in euro.split(',')]
for i in euro:
    who_region[i] = 'Europe'

# Eastern Mediterranean Region EMRO
emro = 'Afghanistan, Bahrain, Djibouti, Egypt, Iran, Iran (Islamic Republic of), Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Pakistan, Palestine, West Bank and Gaza, Qatar, Saudi Arabia, Somalia, Sudan, Syria, Syrian Arab Republic, Tunisia, United Arab Emirates, Yemen'
emro = [i.strip() for i in emro.split(',')]
for i in emro:
    who_region[i] = 'Eastern Mediterranean'

# Western Pacific Region WPRO
wpro = "Australia, Brunei, Brunei Darussalam, Republic of Korea, Cambodia, China, Cook Islands, Fiji, Japan, Kiribati, Laos, Lao People's Democratic Republic, Malaysia, Marshall Islands, Micronesia, Micronesia (Federated States of), Mongolia, Nauru, North Korea, New Zealand, Niue, Palau, Papua New Guinea, Philippines, South Korea, Democratic People's Republic of Korea, Samoa, Singapore, Solomon Islands, Taiwan, Taiwan*, Tonga, Tuvalu, Vanuatu, Vietnam, Viet Nam"
wpro = [i.strip() for i in wpro.split(',')]
for i in wpro:
    who_region[i] = 'Western Pacific'

In [8]:
complete['WHO Region'] = complete['Country'].map(who_region)
complete.head()

Unnamed: 0,Country,Year,Number of reported cases of cholera,Number of reported deaths from cholera,Cholera case fatality rate,WHO Region
0,Afghanistan,2016,677,5,0.7,Eastern Mediterranean
1,Afghanistan,2015,58064,8,0.01,Eastern Mediterranean
2,Afghanistan,2014,45481,4,0.0,Eastern Mediterranean
3,Afghanistan,2013,3957,14,0.35,Eastern Mediterranean
4,Afghanistan,2012,12,0,0.1,Eastern Mediterranean


In [9]:
complete[complete['WHO Region'].isna()]['Country'].unique()

array([], dtype=object)

## Save data

In [10]:
complete.to_csv('data.csv', index=False)