In [1]:
import pandas as pd
from pandas import Series, DataFrame
import country_converter as coco
import os

In [2]:
# https://data.unicef.org/topic/child-health/immunization/
# Immunization coverage by antigen
# https://data.unicef.org/wp-content/uploads/2016/07/wuenic2021rev_web-update.xlsx

In [3]:
path = '/Users/ariana/desktop/historical_tech'
os.chdir(path)

In [4]:
file = pd.ExcelFile('raw data/wuenic2021rev_web-update.xlsx')
unicef_sheets = file.sheet_names

In [5]:
def read_unicef(sheet):
    unicef = pd.read_excel('raw data/wuenic2021rev_web-update.xlsx', sheet_name=sheet)
    unicef.rename(columns={'country': 'Country Name', 'vaccine': 'Technology Name'}, inplace=True)
    unicef['Technology Name'] = unicef['Technology Name'] + ' Vaccine'
    unicef['Unit'] = 'Percentage'
    unicef['Metric'] = 'Immunization Coverage'
    unicef['Data Source'] = 'UNICEF'
    unicef['Spatial Scale'] = 'National'
    iso3 = unicef['iso3']
    iso2 = []
    for iso in iso3:
        iso2.append(coco.convert(names=iso, to='iso2'))
    unicef['Country Code'] = iso2
    unicef.drop(columns={'unicef_region', 'iso3'}, inplace=True)
    unicef['ID'] = unicef['Technology Name'] + '_'+ unicef['Metric'] + '_' + '_' + unicef['Country Code']
    unicef.set_index('ID', inplace=True)
    return unicef


In [6]:
unicef_1 = read_unicef(unicef_sheets[0])
unicef_list = [unicef_1]

In [7]:
check_col = []
for col in unicef_1.columns:
    if len(col) == 4 and col != 'Unit':
        check_col.append(col)
        
for year in check_col:
    check = unicef_1[year]
    for pct in check:
        if pct > 100 or pct < 0:
            raise Exception("percentages can't be greater than 100 or less than 0")

In [8]:
for x in unicef_sheets[1:-1]:
    df = read_unicef(x)
    check_col = []
    for col in df.columns:
        if len(col) == 4 and col != 'Unit':
            check_col.append(col)
    for year in check_col:
        check = df[year]
        for pct in check:
            if pct > 100 or pct < 0:
                raise Exception("percentages can't be greater than 100 or less than 0")
    unicef_list.append(df)

In [9]:
un = pd.concat(unicef_list)
un

Unnamed: 0_level_0,Country Name,Technology Name,2021,2020,2019,2018,2017,2016,2015,2014,...,1984,1983,1982,1981,1980,Unit,Metric,Data Source,Spatial Scale,Country Code
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BCG Vaccine_Immunization Coverage__AF,Afghanistan,BCG Vaccine,84.0,87.0,86.0,89.0,84.0,78.0,76.0,74.0,...,11.0,10.0,10.0,,,Percentage,Immunization Coverage,UNICEF,National,AF
BCG Vaccine_Immunization Coverage__AL,Albania,BCG Vaccine,99.0,98.0,99.0,99.0,99.0,99.0,99.0,99.0,...,90.0,90.0,92.0,93.0,93.0,Percentage,Immunization Coverage,UNICEF,National,AL
BCG Vaccine_Immunization Coverage__DZ,Algeria,BCG Vaccine,99.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,DZ
BCG Vaccine_Immunization Coverage__AO,Angola,BCG Vaccine,56.0,58.0,69.0,72.0,69.0,40.0,64.0,72.0,...,28.0,25.0,,,,Percentage,Immunization Coverage,UNICEF,National,AO
BCG Vaccine_Immunization Coverage__AR,Argentina,BCG Vaccine,81.0,75.0,85.0,93.0,95.0,92.0,96.0,99.0,...,78.0,70.0,77.0,70.0,62.0,Percentage,Immunization Coverage,UNICEF,National,AR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YFV Vaccine_Immunization Coverage__SL,Sierra Leone,YFV Vaccine,85.0,86.0,90.0,85.0,80.0,80.0,78.0,80.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,SL
YFV Vaccine_Immunization Coverage__SR,Suriname,YFV Vaccine,61.0,44.0,57.0,63.0,67.0,61.0,68.0,61.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,SR
YFV Vaccine_Immunization Coverage__TG,Togo,YFV Vaccine,66.0,66.0,71.0,68.0,73.0,68.0,71.0,75.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,TG
YFV Vaccine_Immunization Coverage__TT,Trinidad and Tobago,YFV Vaccine,91.0,89.0,98.0,88.0,95.0,85.0,91.0,96.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,TT


In [16]:
omit = []
for col in un.columns:
    if len(col) != 4 or col == 'Unit':
        omit.append(col)
empty_rows = un.drop(columns=omit)
empty_rows.dropna(how='all', inplace=True)
na_idx = []
for country in un.index:
    if country not in empty_rows.index:
        na_idx.append(country)
un.drop(na_idx, inplace=True)
un

Unnamed: 0_level_0,Country Name,Technology Name,2021,2020,2019,2018,2017,2016,2015,2014,...,1984,1983,1982,1981,1980,Unit,Metric,Data Source,Spatial Scale,Country Code
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BCG Vaccine_Immunization Coverage__AF,Afghanistan,BCG Vaccine,84.0,87.0,86.0,89.0,84.0,78.0,76.0,74.0,...,11.0,10.0,10.0,,,Percentage,Immunization Coverage,UNICEF,National,AF
BCG Vaccine_Immunization Coverage__AL,Albania,BCG Vaccine,99.0,98.0,99.0,99.0,99.0,99.0,99.0,99.0,...,90.0,90.0,92.0,93.0,93.0,Percentage,Immunization Coverage,UNICEF,National,AL
BCG Vaccine_Immunization Coverage__DZ,Algeria,BCG Vaccine,99.0,99.0,99.0,99.0,99.0,99.0,99.0,99.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,DZ
BCG Vaccine_Immunization Coverage__AO,Angola,BCG Vaccine,56.0,58.0,69.0,72.0,69.0,40.0,64.0,72.0,...,28.0,25.0,,,,Percentage,Immunization Coverage,UNICEF,National,AO
BCG Vaccine_Immunization Coverage__AR,Argentina,BCG Vaccine,81.0,75.0,85.0,93.0,95.0,92.0,96.0,99.0,...,78.0,70.0,77.0,70.0,62.0,Percentage,Immunization Coverage,UNICEF,National,AR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YFV Vaccine_Immunization Coverage__SL,Sierra Leone,YFV Vaccine,85.0,86.0,90.0,85.0,80.0,80.0,78.0,80.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,SL
YFV Vaccine_Immunization Coverage__SR,Suriname,YFV Vaccine,61.0,44.0,57.0,63.0,67.0,61.0,68.0,61.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,SR
YFV Vaccine_Immunization Coverage__TG,Togo,YFV Vaccine,66.0,66.0,71.0,68.0,73.0,68.0,71.0,75.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,TG
YFV Vaccine_Immunization Coverage__TT,Trinidad and Tobago,YFV Vaccine,91.0,89.0,98.0,88.0,95.0,85.0,91.0,96.0,...,,,,,,Percentage,Immunization Coverage,UNICEF,National,TT


In [17]:
un.to_csv('cleaned data/unicef.csv')