In [1]:
import pandas as pd
import re

In [2]:
input_df = pd.read_csv('../data/raw/WDICSV.csv')

#Filtering data by country
arg_indicators_df = input_df[input_df['Country Code'] == 'ARG']

In [3]:
#I select these Indicators (by name) from the raw data to analyze them in this project 
#It would have been easier to save the ID of these indicators.
columns = [
    "Pupil-teacher ratio, preprimary",
    "Pupil-teacher ratio, primary",
    "Pupil-teacher ratio, secondary",
    "Pupil-teacher ratio, tertiary",
    "Pupil-teacher ratio, upper secondary",
    "Rural population",
    "School enrollment, preprimary (% gross)",
    "School enrollment, primary (% gross)",
    "School enrollment, secondary (% gross)",
    "School enrollment, tertiary (% gross)",
    "Secondary education, duration (years)",
    "Account ownership at a financial institution or with a mobile-money-service provider, primary education or less (% of population ages 15+)",
    "Adjusted net enrollment rate, primary (% of primary school age children)",
    "Children out of school, primary",
    "Current education expenditure, primary (% of total expenditure in primary public institutions)",
    "Educational attainment, at least completed primary, population 25+ years, total (%) (cumulative)",
    "Expenditure on primary education (% of government expenditure on education)",
    "Over-age students, primary (% of enrollment)",
    "Primary completion rate, total (% of relevant age group)",
    "Primary school age children out-of-school (%)",
    "Primary school starting age (years)",
    "Pupils below minimum reading proficiency at end of primary (%). Low GAML threshold",
    "Educational attainment, at least Bachelor's or equivalent, population 25+, total (%) (cumulative)",
    "Educational attainment, at least completed lower secondary, population 25+, total (%) (cumulative)",
    "Educational attainment, at least completed post-secondary, population 25+, total (%) (cumulative)",
    "Educational attainment, at least completed short-cycle tertiary, population 25+, total (%) (cumulative)",
    "Educational attainment, at least completed upper secondary, population 25+, total (%) (cumulative)",
    "Educational attainment, at least Master's or equivalent, population 25+, total (%) (cumulative)",
    "Educational attainment, Doctoral or equivalent, population 25+, total (%) (cumulative)",
    "Current education expenditure, secondary (% of total expenditure in secondary public institutions)",
    "Current education expenditure, tertiary (% of total expenditure in tertiary public institutions)",
    "Current education expenditure, total (% of total expenditure in public institutions)",
    "Average working hours of children, study and work, ages 7-14 (hours per week)",
    "Average working hours of children, working only, ages 7-14 (hours per week)",
    "Children in employment, total (% of children ages 7-14)",
    "Employers, total (% of total employment) (modeled ILO estimate)",
    "Labor force with advanced education (% of total working-age population with advanced education)",
    "Labor force with basic education (% of total working-age population with basic education)",
    "Labor force with intermediate education (% of total working-age population with intermediate education)",
    "Labor force, total",
    "Life expectancy at birth, total (years)",
    "Literacy rate, adult total (% of people ages 15 and above)",
    "Literacy rate, youth total (% of people ages 15-24)",
    "Mobile cellular subscriptions",
    "Number of deaths ages 10-14 years",
    "Number of deaths ages 15-19 years",
    "Number of deaths ages 20-24 years",
    "Number of deaths ages 5-9 years",
    "Number of infant deaths",
    "Number of under-five deaths",
    "Part time employment, total (% of total employment)",
    "People using safely managed sanitation services (% of population)",
    "Population ages 0-14 (% of total population)",
    "Population ages 0-14, total",
    "Population ages 15-64 (% of total population)",
    "Population ages 65 and above (% of total population)",
    "Population, total",
    "Gini index",
    "Individuals using the Internet (% of population)",
    "Fixed broadband subscriptions",
    "Fixed telephone subscriptions",
    "GDP (constant 2015 US$)",
    "GDP growth (annual %)",
    "GDP per capita (constant 2015 US$)",
    "GDP per capita growth (annual %)",
    "Access to electricity (% of population)",
    "Current health expenditure (% of GDP)",
    "Expenditure on secondary education (% of government expenditure on education)",
    "Expenditure on tertiary education (% of government expenditure on education)",
    "Coverage of social insurance programs (% of population)",
    "Coverage of social protection and labor programs (% of population)",
    "Coverage of social safety net programs (% of population)",
    "Coverage of unemployment benefits and ALMP (% of population)",
    "Urban population (% of total population)",
    "Share of youth not in education, employment or training, total (% of youth population)  (modeled ILO estimate)",
    "Refugee population by country or territory of asylum",
    "Refugee population by country or territory of origin",
    "Self-employed, total (% of total employment) (modeled ILO estimate)",
    "Unemployment with advanced education (% of total labor force with advanced education)",
    "Unemployment with basic education (% of total labor force with basic education)",
    "Unemployment with intermediate education (% of total labor force with intermediate education)",
    "Unemployment, total (% of total labor force) (modeled ILO estimate)",
    "Compulsory education, duration (years)",
    "Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+)",
    "Account ownership at a financial institution or with a mobile-money-service provider, older adults (% of population ages 25+)",
    "Account ownership at a financial institution or with a mobile-money-service provider, poorest 40% (% of population ages 15+)",
    "Account ownership at a financial institution or with a mobile-money-service provider, richest 60% (% of population ages 15+)",
    "Account ownership at a financial institution or with a mobile-money-service provider, secondary education or more (% of population ages 15+)",
    "Account ownership at a financial institution or with a mobile-money-service provider, young adults (% of population ages 15-24)",
    "Adequacy of social insurance programs (% of total welfare of beneficiary households)",
    "Adequacy of social protection and labor programs (% of total welfare of beneficiary households)",
    "Adequacy of social safety net programs (% of total welfare of beneficiary households)",
    "Adequacy of unemployment benefits and ALMP (% of total welfare of beneficiary households)",
    "Benefit incidence of social insurance programs to poorest quintile (% of total social insurance benefits)",
    "Benefit incidence of social protection and labor programs to poorest quintile (% of total SPL benefits)",
    "Benefit incidence of social safety net programs to poorest quintile (% of total safety net benefits)",
    "Benefit incidence of unemployment benefits and ALMP to poorest quintile (% of total U/ALMP benefits)"
]

len(columns)

97

In [4]:
#This code cell it's necessary to filter the raw data by Indicator Name 
escaped_strings = [re.escape(column) for column in columns]
pattern = '|'.join(escaped_strings)

indicators_df = arg_indicators_df[arg_indicators_df['Indicator Name'].str.contains(pattern, case=False, na=False)]

different_values = list(set(indicators_df['Indicator Name'].unique().tolist()) - set(columns))

indicators_df = indicators_df[~(indicators_df['Indicator Name'].isin(different_values))]
indicators_df.shape[0]


97

In [None]:
#Indexing data by year
transposed_indicators_df = indicators_df.drop(["Country Code","Indicator Code","Country Name"], axis=1, inplace=False).set_index("Indicator Name").T
transposed_indicators_df

Indicator Name,Access to electricity (% of population),Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+),"Account ownership at a financial institution or with a mobile-money-service provider, older adults (% of population ages 25+)","Account ownership at a financial institution or with a mobile-money-service provider, poorest 40% (% of population ages 15+)","Account ownership at a financial institution or with a mobile-money-service provider, primary education or less (% of population ages 15+)","Account ownership at a financial institution or with a mobile-money-service provider, richest 60% (% of population ages 15+)","Account ownership at a financial institution or with a mobile-money-service provider, secondary education or more (% of population ages 15+)","Account ownership at a financial institution or with a mobile-money-service provider, young adults (% of population ages 15-24)",Adequacy of social insurance programs (% of total welfare of beneficiary households),Adequacy of social protection and labor programs (% of total welfare of beneficiary households),...,"School enrollment, secondary (% gross)","School enrollment, tertiary (% gross)","Secondary education, duration (years)","Self-employed, total (% of total employment) (modeled ILO estimate)","Share of youth not in education, employment or training, total (% of youth population) (modeled ILO estimate)",Unemployment with advanced education (% of total labor force with advanced education),Unemployment with basic education (% of total labor force with basic education),Unemployment with intermediate education (% of total labor force with intermediate education),"Unemployment, total (% of total labor force) (modeled ILO estimate)",Urban population (% of total population)
1960,,,,,,,,,,,...,,,,,,,,,,73.611
1961,,,,,,,,,,,...,,,,,,,,,,74.217
1962,,,,,,,,,,,...,,,,,,,,,,74.767
1963,,,,,,,,,,,...,,,,,,,,,,75.309
1964,,,,,,,,,,,...,,,,,,,,,,75.844
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019,100.0,,,,,,,,43.719119,39.044186,...,110.498528,95.077553,6.0,26.507172,19.228,3.874,12.303,11.232,9.843,91.991
2020,100.0,,,,,,,,45.776217,39.301338,...,112.416679,99.266228,6.0,26.999983,22.328,4.718,14.823,12.566,11.461,92.111
2021,100.0,71.63,70.62,65.23,57.1,75.82,79.69,75.37,41.644065,36.560519,...,116.559448,107.130661,6.0,27.588936,16.118,3.426,10.395,10.645,8.736,92.229
2022,100.0,,,,,,,,,,...,,,6.0,27.605387,15.872,2.898,7.777,8.295,6.805,92.347


In [6]:
transposed_indicators_df.info

<bound method DataFrame.info of Indicator Name  Access to electricity (% of population)  \
1960                                                NaN   
1961                                                NaN   
1962                                                NaN   
1963                                                NaN   
1964                                                NaN   
...                                                 ...   
2019                                              100.0   
2020                                              100.0   
2021                                              100.0   
2022                                              100.0   
2023                                                NaN   

Indicator Name  Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+)  \
1960                                                          NaN                                                               

In [8]:
#Saving the interim data
output_indicators_file = transposed_indicators_df.to_csv('../data/interim/WDICSV_INTERIM.csv', index_label='Year')