In [1]:
import pandas as pd

def select_columns(df, columns):
    df.rename(columns=columns, inplace=True)
    df.rename(columns={"Country Name": "country", "Country Code": "code", "Year": "year"}, inplace=True)
    return df[["country", "code", "year"]+[column for column in columns.values()]]

df_agriculture = pd.read_csv('./WB-datasets/agriculture-and-rural-development/agriculture-and-rural-development.csv')
agriculture_columns = {
    # "average_value_Access to electricity, rural (% of rural population)" : "porcentagem da populacao rural com acesso a eletricidade",
    "average_value_Agricultural raw materials exports (% of merchandise exports)" : "exportacao_de_materias_primas_agricolas",
    "average_value_Agricultural raw materials imports (% of merchandise imports)" : "importacao_de_materias_primas_agricolas",
    "average_value_Rural population (% of total population)" : "porcentagem_da_populacao_rural",
    # "average_value_Agricultural methane emissions (% of total)" : "emissoes de metano agricolas",
}

df_enviroment = pd.read_csv('./WB-datasets/environment/environment.csv')
enviroment_columns = {
    "average_value_Adjusted savings: carbon dioxide damage (% of GNI)" : "dano_de_CO2_ajustado",
    "average_value_Adjusted savings: education expenditure (% of GNI)" : "gastos_com_educacao_ajustados",
    "average_value_People using at least basic drinking water services (% of population)" : "porcentagem_da_populacao_com_acesso_a_agua_potavel",
    "average_value_People using at least basic sanitation services (% of population)" : "porcentagem_da_populacao_com_acesso_a_saneamento_basico",
    "average_value_Total natural resources rents (% of GDP)" : "renda_total_de_recursos_naturais",
}   

df_health = pd.read_csv('./WB-datasets/health/health.csv')
health_columns = {
    "average_value_Population growth (annual %)" : "crescimento_populacional",
    "average_value_Population, female (% of total population)" : "porcentagem_da_populacao_feminina",
    "average_value_Population, total" : "populacao_total",
    "average_value_Population ages 15-64 (% of total population)" : "porcentagem_da_populacao_entre_15_e_64_anos",
}

df_poverty = pd.read_csv('./WB-datasets/poverty/poverty.csv')
poverty_columns = {
    # "average_value_Gini index (World Bank estimate)" : "indice de gini",
    "average_value_Proportion of people living below 50 percent of median income (%)" : "porcentagem da populacao vivendo com menos de 50% da renda mediana",
}

df_agriculture = select_columns(df_agriculture, agriculture_columns)
df_enviroment = select_columns(df_enviroment, enviroment_columns)
df_health = select_columns(df_health, health_columns)
df_poverty = select_columns(df_poverty, poverty_columns)

df_brabo = pd.merge(df_agriculture, df_enviroment, on=["country", "code", "year"])
df_brabo = pd.merge(df_brabo, df_health, on=["country", "code", "year"])
df_brabo = pd.merge(df_brabo, df_poverty, on=["country", "code", "year"])


In [2]:
countries = pd.read_json('./data/countries.json')
# lets add a continent column to the dataframe
df_brabo['continent'] = df_brabo['country'].map(countries.set_index('country')['continent'])

# Vamos mudar a entrada de country "Korea, Dem. People's Rep." para "North Korea"
df_health.loc[df_health['country'] == "Korea, Dem. People's Rep.", 'country'] = 'North Korea'
# Agora Channel Islands, que é Chile 
df_health.loc[df_health['country'] == "Channel Islands", 'country'] = 'Chile'
# Timor-Leste é East Timor
df_health.loc[df_health['country'] == "Timor-Leste", 'country'] = 'East Timor'

# vamos listar os países sem continente
df_brabo[df_brabo['continent'].isna()]['country'].unique()

# vamos remover todos os países que não tem continente
df_brabo = df_brabo[df_brabo['continent'].notna()]


In [3]:
# vamos exportar para um csv
df_brabo.to_csv('data/data.csv', index=False)
# e agora vamos exportar para um json
df_brabo.to_json('data/data.json', orient='records')

# agora repetindo o processo para o dataset de 2018
df_brabo_2018 = df_brabo[df_brabo['year'] == 2018]
df_brabo_2018.to_csv('data/data_2018.csv', index=False)
df_brabo_2018.to_json('data/data_2018.json', orient='records')

In [4]:
# verificando a entrada da somalinandia
df_brabo[df_brabo['country'] == 'Somaliland']

Unnamed: 0,country,code,year,exportacao_de_materias_primas_agricolas,importacao_de_materias_primas_agricolas,porcentagem_da_populacao_rural,dano_de_CO2_ajustado,gastos_com_educacao_ajustados,porcentagem_da_populacao_com_acesso_a_agua_potavel,porcentagem_da_populacao_com_acesso_a_saneamento_basico,renda_total_de_recursos_naturais,crescimento_populacional,porcentagem_da_populacao_feminina,populacao_total,porcentagem_da_populacao_entre_15_e_64_anos,porcentagem da populacao vivendo com menos de 50% da renda mediana,continent
