In [1]:
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco

In [2]:
# https://data.unicef.org/topic/child-health/immunization/
# Immunization coverage by antigen
# https://data.unicef.org/wp-content/uploads/2016/07/wuenic2021rev_web-update.xlsx

In [3]:
file = pd.ExcelFile('raw data/wuenic2021rev_web-update.xlsx')
unicef_sheets = file.sheet_names
unicef_sheets

['BCG',
 'DTP1',
 'DTP3',
 'HEPB3',
 'HEPBB',
 'HIB3',
 'IPV1',
 'MCV1',
 'MCV2',
 'PCV3',
 'POL3',
 'RCV1',
 'ROTAC',
 'YFV',
 'regional_global']

In [4]:
def read_unicef(sheet):
    unicef = pd.read_excel('raw data/wuenic2021rev_web-update.xlsx', sheet_name=sheet)
    unicef.rename(columns={'country': 'Country Name', 'vaccine': 'Technology Name'}, inplace=True)
    unicef['Technology Name'] = unicef['Technology Name'] + ' Vaccine'
    unicef['Unit'] = 'Percentage'
    unicef['Metric'] = 'Immunization Coverage'
    unicef['Data Source'] = 'UNICEF'
    unicef['Spatial Scale'] = 'National'
    iso3 = unicef['iso3']
    iso2 = []
    for iso in iso3:
        iso2.append(coco.convert(names=iso, to='iso2'))
    unicef['Country Code'] = iso2
    unicef.drop(columns={'unicef_region', 'iso3'}, inplace=True)
    unicef['ID'] = unicef['Technology Name'] + '_'+ unicef['Metric'] + '_'+ unicef['Unit'] + '_' + unicef['Country Code']
    unicef.set_index('ID', inplace=True)
    return unicef

In [5]:
unicef_1 = read_unicef(unicef_sheets[0])
unicef_list = [unicef_1]

In [6]:
check_col = []
for col in unicef_1.columns:
    if len(col) == 4 and col != 'Unit':
        check_col.append(col)
        
for year in check_col:
    check = unicef_1[year]
    for pct in check:
        if pct > 100 or pct < 0:
            raise Exception("percentages can't be greater than 100 or less than 0")

In [7]:
for x in unicef_sheets[1:-1]:
    df = read_unicef(x)
    check_col = []
    for col in df.columns:
        if len(col) == 4 and col != 'Unit':
            check_col.append(col)
    for year in check_col:
        check = df[year]
        for pct in check:
            if pct > 100 or pct < 0:
                raise Exception("percentages can't be greater than 100 or less than 0")
    unicef_list.append(df)

In [8]:
un = pd.concat(unicef_list)
un.to_csv('cleaned data/unicef.csv')