# Import libraries

In [1]:
import pandas as pd
import re
import session_info

# Filter data by country and indicator code 

In [2]:
input_df = pd.read_csv("../data/raw/WDICSV.csv")

arg_indicators_df = input_df[input_df["Country Code"] == "ARG"]

I selected the next code Indicators from the raw data to analyze them in this project 

In [3]:
selected_indicators = [
    "EG.ELC.ACCS.ZS",
    "FX.OWN.TOTL.ZS",
    "FX.OWN.TOTL.OL.ZS",
    "FX.OWN.TOTL.40.ZS",
    "FX.OWN.TOTL.PL.ZS",
    "FX.OWN.TOTL.60.ZS",
    "FX.OWN.TOTL.SO.ZS",
    "FX.OWN.TOTL.YG.ZS",
    "per_si_allsi.adq_pop_tot",
    "per_allsp.adq_pop_tot",
    "per_sa_allsa.adq_pop_tot",
    "per_lm_alllm.adq_pop_tot",
    "SE.PRM.TENR",
    "SL.TLF.0714.SW.TM",
    "SL.TLF.0714.WK.TM",
    "per_si_allsi.ben_q1_tot",
    "per_allsp.ben_q1_tot",
    "per_sa_allsa.ben_q1_tot",
    "per_lm_alllm.ben_q1_tot",
    "SL.TLF.0714.ZS",
    "SE.PRM.UNER",
    "SE.COM.DURS",
    "per_si_allsi.cov_pop_tot",
    "per_allsp.cov_pop_tot",
    "per_sa_allsa.cov_pop_tot",
    "per_lm_alllm.cov_pop_tot",
    "SE.XPD.CPRM.ZS",
    "SE.XPD.CSEC.ZS",
    "SE.XPD.CTER.ZS",
    "SE.XPD.CTOT.ZS",
    "SH.XPD.CHEX.GD.ZS",
    "SE.TER.CUAT.BA.ZS",
    "SE.SEC.CUAT.LO.ZS",
    "SE.SEC.CUAT.PO.ZS",
    "SE.PRM.CUAT.ZS",
    "SE.TER.CUAT.ST.ZS",
    "SE.SEC.CUAT.UP.ZS",
    "SE.TER.CUAT.MS.ZS",
    "SE.TER.CUAT.DO.ZS",
    "SL.EMP.MPYR.ZS",
    "SE.XPD.PRIM.ZS",
    "SE.XPD.SECO.ZS",
    "SE.XPD.TERT.ZS",
    "IT.NET.BBND",
    "IT.MLT.MAIN",
    "NY.GDP.MKTP.KD",
    "NY.GDP.MKTP.KD.ZG",
    "NY.GDP.PCAP.KD",
    "NY.GDP.PCAP.KD.ZG",
    "SI.POV.GINI",
    "IT.NET.USER.ZS",
    "SL.TLF.ADVN.ZS",
    "SL.TLF.BASC.ZS",
    "SL.TLF.INTM.ZS",
    "SL.TLF.TOTL.IN",
    "SP.DYN.LE00.IN",
    "SE.ADT.LITR.ZS",
    "SE.ADT.1524.LT.ZS",
    "IT.CEL.SETS",
    "SH.DTH.1014",
    "SH.DTH.1519",
    "SH.DTH.2024",
    "SH.DTH.0509",
    "SH.DTH.IMRT",
    "SH.DTH.MORT",
    "SE.PRM.OENR.ZS",
    "SL.TLF.PART.ZS",
    "SH.STA.SMSS.ZS",
    "SP.POP.0014.TO.ZS",
    "SP.POP.1564.TO.ZS",
    "SP.POP.65UP.TO.ZS",
    "SP.POP.TOTL",
    "SE.PRM.CMPT.ZS",
    "SE.LPV.PRIM.SD",
    "SE.PRM.AGES",
    "SE.LPV.PRIM.LD",
    "SE.PRE.ENRL.TC.ZS",
    "SE.PRM.ENRL.TC.ZS",
    "SE.SEC.ENRL.TC.ZS",
    "SE.TER.ENRL.TC.ZS",
    "SE.SEC.ENRL.UP.TC.ZS",
    "SM.POP.REFG",
    "SM.POP.REFG.OR",
    "SP.RUR.TOTL.ZS",
    "SE.PRE.ENRR",
    "SE.PRM.ENRR",
    "SE.SEC.ENRR",
    "SE.TER.ENRR",
    "SE.SEC.DURS",
    "SL.EMP.SELF.ZS",
    "SL.UEM.NEET.ME.ZS",
    "SL.UEM.ADVN.ZS",
    "SL.UEM.BASC.ZS",
    "SL.UEM.INTM.ZS",
    "SL.UEM.TOTL.ZS",
    "SP.URB.TOTL.IN.ZS"
]

len(selected_indicators)

96

## Code cell for filtering by Indicator Name

This code cell it's useful to filter the raw data by Indicator Name. It's not necessary if you are filterin by Indicator Code or ID

In [4]:
escaped_strings = [re.escape(column) for column in selected_indicators]
pattern = "|".join(escaped_strings)

indicators_df = arg_indicators_df[arg_indicators_df["Indicator Name"].str.contains(pattern, case=False, na=False)]

different_values = list(set(indicators_df["Indicator Name"].unique().tolist()) - set(selected_indicators))

indicators_df = indicators_df[~(indicators_df["Indicator Name"].isin(different_values))]
indicators_df.shape[0]


0

## Code cell for filtering by Indicator Code

In [5]:
indicators_df = arg_indicators_df[arg_indicators_df["Indicator Code"].isin(selected_indicators)]
indicators_df.shape[0]

96

# Create a DataFrame dictionary and index data by indicator code

In [18]:
def get_snake_case(x):
    # Replace spaces or hyphens with underscores
    x = re.sub(r"[\s\-.]+", "_", x)
    # Add an underscore before any uppercase letter preceded by a lowercase or digit
    x = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", x)
    # Convert the entire string to lowercase
    x = x.lower()
    return x.strip("_")  # Remove leading/trailing underscores


df_dictionary = indicators_df[["Indicator Name", "Indicator Code"]].copy()
df_dictionary["Indicator Code Snake Case"] = df_dictionary["Indicator Code"].apply(get_snake_case)

wdi_translations = pd.read_csv("../data/utils/WDI_TRANSLATIONS.csv",index_col="Indicator Code") #File generated with ChatGPT

df_dictionary["Indicator Name in Spanish"] = (
    df_dictionary["Indicator Code"]
        .map(wdi_translations["Indicator Name in Spanish"])
        .fillna("")
)

df_dictionary = (
    pd.concat(
        [
            df_dictionary,
            pd.DataFrame(
                {
                    "Indicator Code": ["year"],
                    "Indicator Code Snake Case": ["year"],
                    "Indicator Name": ["Year"],
                    "Indicator Name in Spanish": ["Año"]
                }
            ),
        ],
        ignore_index=True
    )
)

transposed_indicators_df = indicators_df.drop(["Country Code", "Country Name", "Indicator Name"], axis=1, inplace=False).set_index("Indicator Code").T

df_dictionary.head(5)

Unnamed: 0,Indicator Name,Indicator Code,Indicator Code Snake Case,Indicator Name in Spanish
0,Access to electricity (% of population),EG.ELC.ACCS.ZS,eg_elc_accs_zs,Acceso a la electricidad (% de la población)
1,Account ownership at a financial institution o...,FX.OWN.TOTL.ZS,fx_own_totl_zs,Titularidad de cuenta en una institución finan...
2,Account ownership at a financial institution o...,FX.OWN.TOTL.OL.ZS,fx_own_totl_ol_zs,Titularidad de cuenta en una institución finan...
3,Account ownership at a financial institution o...,FX.OWN.TOTL.40.ZS,fx_own_totl_40_zs,Titularidad de cuenta en una institución finan...
4,Account ownership at a financial institution o...,FX.OWN.TOTL.PL.ZS,fx_own_totl_pl_zs,Titularidad de cuenta en una institución finan...


In [12]:
transposed_indicators_df.shape

(64, 96)

# Save the interim data

I'll save the interim data and a dataFrame dictionary, where I'll store the relationship between indicator codes and names 

In [16]:
transposed_indicators_df.to_csv("../data/interim/WDICSV_INTERIM.csv", index_label="Year")
df_dictionary.to_csv("../data/utils/DF_DICTIONARY.csv", index=False, quoting=1)

# Session Info

In [16]:
session_info.show()