# NoteBook Ingest dataset export_goods_services_dataset

***Author:*** Frederick salazar <br>
***Data Source:*** https://datos.bancomundial.org/indicador/NE.EXP.GNFS.ZS?view=chart <br>
***Description:*** This dataset contains data on exports of goods and services as a percentage of GDP for each country from 1960 to 2023. The data is obtained from the World Bank repository and exported in tabular form in CSV format.

## Importations and configurations

In [155]:
#library importations

import pandas as pd

import os
import requests
import zipfile
from io import BytesIO
from unidecode import unidecode

version = 1.0

In [156]:
raw = './data/raw'
processed_data = './data/processed'
output_data = './data/output'

output_dataset = './data/processed/export_goods_services_dataset.csv'

In [157]:
def donwload_data(data_source_in):
    """this function get files from url and save it in the raw folder

    Args:
        data_source_in (String): url of the file to download
    """
    response = requests.get(data_source_in)

    if response.status_code == 200:
        # Leer el contenido del archivo ZIP en memoria
        zip_file = BytesIO(response.content)
        
        # Descomprimir el contenido del ZIP directamente en el directorio
        with zipfile.ZipFile(zip_file) as z:
            z.extractall(raw)

## Download and read data

In [158]:
#here use the function to donwload data from url
donwload_data('https://api.worldbank.org/v2/es/indicator/NE.EXP.GNFS.ZS?downloadformat=csv')

## load data

In [159]:

for file in os.listdir(raw):
   if file.startswith('API_NE.EXP.GNFS.ZS_DS2_es_csv_v2_5037'):
       df_exports = pd.read_csv(f'{raw}/{file}',
                                sep=',',
                                skiprows=4)
       df_exports = df_exports.iloc[:, :-1]
    
   if file.startswith('Metadata_Country'):
        df_metadata = pd.read_csv(f'{raw}/{file}',
                                  sep=',',
                                  skiprows=0)
        df_metadata = df_metadata.iloc[:, :-1]

## Transform data

In [160]:
#convertimos todos los strings en upper
df_exports = df_exports.applymap(lambda x: x.upper() if type(x) == str else x)
df_metadata = df_metadata.applymap(lambda x: x.upper() if type(x) == str else x)

#delete acents
df_exports = df_exports.applymap(lambda x: unidecode(x) if type(x) == str else x)

  df_exports = df_exports.applymap(lambda x: x.upper() if type(x) == str else x)
  df_metadata = df_metadata.applymap(lambda x: x.upper() if type(x) == str else x)
  df_exports = df_exports.applymap(lambda x: unidecode(x) if type(x) == str else x)


In [161]:
#change from large table to long table
df_exports = pd.melt(df_exports,
                     id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                     var_name='year',
                     value_name='exports_of_goods_and_services')
                             

In [162]:
#change de column name
df_exports.rename(columns={'Country Name': 'country_name',
                           'Country Code': 'country_code',
                           'Indicator Name': 'indicator_name',
                           'Indicator Code': 'indicator_code'},
                  inplace=True)

df_metadata.rename(columns={'Country Name': 'country_name',
                            'Country Code': 'country_code',
                            'Region': 'region',
                            'Income_Group': 'income_group'},
                   inplace=True)

In [163]:
df_exports['exports_of_goods_and_services'] = df_exports['exports_of_goods_and_services'].fillna(0)

In [164]:
df_exports = pd.merge(df_exports, df_metadata, on='country_code', how='left')

In [165]:
df_exports = df_exports[['country_name_x',
                         'country_code',
                         'region',
                         'income_group',
                         'indicator_name',
                         'indicator_code',
                         'year',
                         'exports_of_goods_and_services']]

df_exports.rename(columns={'country_name_x': 'country_name'}, inplace=True)

In [166]:
df_exports

Unnamed: 0,country_name,country_code,region,income_group,indicator_name,indicator_code,year,exports_of_goods_and_services
0,ARUBA,ABW,,INGRESO ALTO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,1960,0.000000
1,,AFE,,AGREGADOS,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,1960,0.000000
2,AFGANISTAN,AFG,ASIA MERIDIONAL,PAÍSES DE INGRESO BAJO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,1960,0.000000
3,,AFW,,AGREGADOS,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,1960,0.000000
4,ANGOLA,AGO,ÁFRICA AL SUR DEL SAHARA (EXCLUIDO ALTOS INGRE...,PAÍSES DE INGRESO MEDIANO BAJO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,1960,0.000000
...,...,...,...,...,...,...,...,...
17019,KOSOVO,XKX,EUROPA Y ASIA CENTRAL (EXCLUIDO ALTOS INGRESOS),INGRESO MEDIANO ALTO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,2023,39.640480
17020,"YEMEN, REP. DEL",YEM,ORIENTE MEDIO Y NORTE DE ÁFRICA (EXCLUIDO ALTO...,PAÍSES DE INGRESO BAJO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,2023,0.000000
17021,SUDAFRICA,ZAF,ÁFRICA AL SUR DEL SAHARA (EXCLUIDO ALTOS INGRE...,INGRESO MEDIANO ALTO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,2023,32.754527
17022,ZAMBIA,ZMB,ÁFRICA AL SUR DEL SAHARA (EXCLUIDO ALTOS INGRE...,PAÍSES DE INGRESO MEDIANO BAJO,EXPORTACIONES DE BIENES Y SERVICIOS (% DEL PIB),NE.EXP.GNFS.ZS,2023,40.846124


## Save processed data

In [167]:
df_exports.to_csv(output_dataset, index=False)