## Data source
https://apps.who.int/gho/data/node.main.A1363?lang=en

## Libraries

In [1]:
import pandas as pd

## Estimated data

In [2]:
est_cases = pd.read_csv('estimated_cases.csv', skiprows=1)
est_cases = est_cases.melt(id_vars='Country', var_name='Year', value_name='No. of cases')
est_cases.head()

Unnamed: 0,Country,Year,No. of cases
0,Afghanistan,2017,630 308 [495 000 - 801 000]
1,Algeria,2017,0
2,Angola,2017,4 615 605 [3 106 000 - 6 661 000]
3,Argentina,2017,0
4,Armenia,2017,0


In [3]:
est_deaths = pd.read_csv('estimated_deaths.csv', skiprows=1)
est_deaths = est_deaths.melt(id_vars='Country', var_name='Year', value_name='No. of deaths')
est_deaths.head()

Unnamed: 0,Country,Year,No. of deaths
0,Afghanistan,2017,298 [110 - 510]
1,Algeria,2017,0
2,Angola,2017,13 316 [9970 - 16600]
3,Argentina,2017,0
4,Armenia,2017,0


In [4]:
est_no = pd.merge(est_cases, est_deaths)
est_no.head()

Unnamed: 0,Country,Year,No. of cases,No. of deaths
0,Afghanistan,2017,630 308 [495 000 - 801 000],298 [110 - 510]
1,Algeria,2017,0,0
2,Angola,2017,4 615 605 [3 106 000 - 6 661 000],13 316 [9970 - 16600]
3,Argentina,2017,0,0
4,Armenia,2017,0,0


In [5]:
for col in ['No. of cases', 'No. of deaths']:
    est_no[col] = est_no[col].str.replace(' ', '')
    est_no[col+'_median'] = est_no[col].str.extract('([0-9.]+)')
    est_no[col+'_min'] = est_no[col].str.extract('[0-9.]+\[([0-9.]+)')
    est_no[col+'_max'] = est_no[col].str.extract('[0-9.]+\[[0-9.]+\-([0-9.]+)')
    
est_no.head()

Unnamed: 0,Country,Year,No. of cases,No. of deaths,No. of cases_median,No. of cases_min,No. of cases_max,No. of deaths_median,No. of deaths_min,No. of deaths_max
0,Afghanistan,2017,630308[495000-801000],298[110-510],630308,495000.0,801000.0,298,110.0,510.0
1,Algeria,2017,0,0,0,,,0,,
2,Angola,2017,4615605[3106000-6661000],13316[9970-16600],4615605,3106000.0,6661000.0,13316,9970.0,16600.0
3,Argentina,2017,0,0,0,,,0,,
4,Armenia,2017,0,0,0,,,0,,


In [6]:
who_region = {}

# African Region AFRO
afro = "Algeria, Angola, Cabo Verde, Eswatini, Sao Tome and Principe, Benin, South Sudan, Western Sahara, Congo (Brazzaville), Congo (Kinshasa), Cote d'Ivoire, Botswana, Burkina Faso, Burundi, Cameroon, Cape Verde, Central African Republic, Chad, Comoros, Ivory Coast, Côte d'Ivoire, Democratic Republic of the Congo, Equatorial Guinea, Eritrea, Ethiopia, Gabon, Gambia, Ghana, Guinea, Guinea-Bissau, Kenya, Lesotho, Liberia, Madagascar, Malawi, Mali, Mauritania, Mauritius, Mozambique, Namibia, Niger, Nigeria, Republic of the Congo, Rwanda, São Tomé and Príncipe, Senegal, Seychelles, Sierra Leone, Somalia, South Africa, Swaziland, Togo, Uganda, Tanzania, United Republic of Tanzania, Zambia, Zimbabwe"
afro = [i.strip() for i in afro.split(',')]
for i in afro:
    who_region[i] = 'Africa'
    
# Region of the Americas PAHO
paho = 'Antigua and Barbuda, Argentina, Bahamas, Barbados, Belize, Bolivia, Bolivia (Plurinational State of), Brazil, Canada, Chile, Colombia, Congo, Costa Rica, Cuba, Dominica, Dominican Republic, Ecuador, El Salvador, Grenada, Guatemala, Guyana, Haiti, Honduras, Jamaica, Mexico, Nicaragua, Panama, Paraguay, Peru, Saint Kitts and Nevis, Saint Lucia, Saint Vincent and the Grenadines, Suriname, Trinidad and Tobago, United States, US, United States of America, Uruguay, Venezuela, Venezuela (Bolivarian Republic of)'
paho = [i.strip() for i in paho.split(',')]
for i in paho:
    who_region[i] = 'Americas'

# South-East Asia Region SEARO
searo = 'Bangladesh, Bhutan, India, Indonesia, Maldives, Myanmar, Burma, Nepal, Sri Lanka, Thailand, Timor-Leste'
searo = [i.strip() for i in searo.split(',')]
for i in searo:
    who_region[i] = 'South-East Asia'

# European Region EURO
euro = 'Albania, Andorra, Greenland, Kosovo, Holy See, Liechtenstein, Armenia, Czechia, Austria, Azerbaijan, Belarus, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Georgia, Germany, Greece, Hungary, Iceland, Ireland, Israel, Italy, Kazakhstan, Kyrgyzstan, Latvia, Lithuania, Luxembourg, Malta, Monaco, Montenegro, Netherlands, North Macedonia, Republic of North Macedonia, Norway, Poland, Portugal, Moldova, Republic of Moldova, Romania, Russia, Russian Federation, San Marino, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Tajikistan, Turkey, Turkmenistan, Ukraine, United Kingdom, United Kingdom of Great Britain and Northern Ireland, Uzbekistan'
euro = [i.strip() for i in euro.split(',')]
for i in euro:
    who_region[i] = 'Europe'

# Eastern Mediterranean Region EMRO
emro = 'Afghanistan, Bahrain, Djibouti, Egypt, Iran, Iran (Islamic Republic of), Iraq, Jordan, Kuwait, Lebanon, Libya, Morocco, Oman, Pakistan, Palestine, West Bank and Gaza, Qatar, Saudi Arabia, Somalia, Sudan, Syria, Syrian Arab Republic, Tunisia, United Arab Emirates, Yemen'
emro = [i.strip() for i in emro.split(',')]
for i in emro:
    who_region[i] = 'Eastern Mediterranean'

# Western Pacific Region WPRO
wpro = "Australia, Brunei, Brunei Darussalam, Republic of Korea, Cambodia, China, Cook Islands, Fiji, Japan, Kiribati, Laos, Lao People's Democratic Republic, Malaysia, Marshall Islands, Micronesia, Mongolia, Nauru, North Korea, New Zealand, Niue, Palau, Papua New Guinea, Philippines, South Korea, Democratic People's Republic of Korea, Samoa, Singapore, Solomon Islands, Taiwan, Taiwan*, Tonga, Tuvalu, Vanuatu, Vietnam, Viet Nam"
wpro = [i.strip() for i in wpro.split(',')]
for i in wpro:
    who_region[i] = 'Western Pacific'

In [7]:
est_no['WHO Region'] = est_no['Country'].map(who_region)
est_no.head()

Unnamed: 0,Country,Year,No. of cases,No. of deaths,No. of cases_median,No. of cases_min,No. of cases_max,No. of deaths_median,No. of deaths_min,No. of deaths_max,WHO Region
0,Afghanistan,2017,630308[495000-801000],298[110-510],630308,495000.0,801000.0,298,110.0,510.0,Eastern Mediterranean
1,Algeria,2017,0,0,0,,,0,,,Africa
2,Angola,2017,4615605[3106000-6661000],13316[9970-16600],4615605,3106000.0,6661000.0,13316,9970.0,16600.0,Africa
3,Argentina,2017,0,0,0,,,0,,,Americas
4,Armenia,2017,0,0,0,,,0,,,Europe


In [8]:
est_no[est_no['WHO Region'].isna()]['Country'].unique()

array([], dtype=object)

In [9]:
est_no.to_csv('estimated_numbers.csv', index=False)

## Reported data

In [10]:
rep_cases = pd.read_csv('reported_indigenous_confirmed_cases.csv', skiprows=1)
rep_cases = rep_cases.melt(id_vars='Country', var_name='Year', value_name='No. of cases')
rep_cases.head()

Unnamed: 0,Country,Year,No. of cases
0,Afghanistan,2017,161778.0
1,Algeria,2017,0.0
2,Angola,2017,3874892.0
3,Argentina,2017,0.0
4,Armenia,2017,0.0


In [11]:
rep_deaths = pd.read_csv('reported_deaths.csv', skiprows=1)
rep_deaths = rep_deaths.melt(id_vars='Country', var_name='Year', value_name='No. of deaths')
rep_deaths.head()

Unnamed: 0,Country,Year,No. of deaths
0,Afghanistan,2017,10.0
1,Algeria,2017,0.0
2,Angola,2017,13967.0
3,Argentina,2017,1.0
4,Armenia,2017,


In [12]:
rep_no = pd.merge(rep_cases, rep_deaths, how='outer')
rep_no['WHO Region'] = rep_no['Country'].map(who_region)
rep_no.head()

Unnamed: 0,Country,Year,No. of cases,No. of deaths,WHO Region
0,Afghanistan,2017,161778.0,10.0,Eastern Mediterranean
1,Algeria,2017,0.0,0.0,Africa
2,Angola,2017,3874892.0,13967.0,Africa
3,Argentina,2017,0.0,1.0,Americas
4,Armenia,2017,0.0,,Europe


In [13]:
rep_no[rep_no['WHO Region'].isna()]['Country'].unique()

array([], dtype=object)

In [14]:
rep_no.to_csv('reported_numbers.csv', index=False)

## Incidence_per_1000_population_at_risk.csv

In [15]:
inc = pd.read_csv('incidence_per_1000_population_at_risk.csv', skiprows=1)
inc = inc.melt(id_vars='Country', var_name='Year', value_name='No. of cases')
inc['WHO Region'] = inc['Country'].map(who_region)
inc.head()

Unnamed: 0,Country,Year,No. of cases,WHO Region
0,Afghanistan,2018,29.01,Eastern Mediterranean
1,Algeria,2018,0.0,Africa
2,Angola,2018,228.91,Africa
3,Argentina,2018,0.0,Americas
4,Armenia,2018,0.0,Europe


In [16]:
inc[inc['WHO Region'].isna()]['Country'].unique()

array([], dtype=object)

In [17]:
inc.to_csv('incidence_per_1000_pop_at_risk.csv', index=False)