In [4]:
# Libraries
from pathlib import Path
import pandas as pd

In [6]:
# Read supporting files
ncm_sh = pd.read_csv('data/NCM_SH.csv', sep=';', decimal=',', encoding='latin1')
pais_bloco = pd.read_csv('data/PAIS_BLOCO.csv', sep=';', decimal=',', encoding='latin1')
pais = pd.read_csv('data/PAIS.csv', sep=';', decimal=',', encoding='latin1')
via = pd.read_csv('data/VIA.csv', sep=';', decimal=',', encoding='latin1')

In [None]:
# Adjustments
def adjust_blocks(df):
    # Economic blocks to remove
    blocks_remove = [53, 111, 22]
    df_filtered = df.copy()
    
    # Remove outdated geographic blocks and rename remaining block columns
    df_filtered = df_filtered[~df_filtered['CO_BLOCO'].isin(blocks_remove)].copy()
    df_filtered = df_filtered.rename(columns={
        'CO_BLOCO': 'CO_BLOCK_GEO',
        'NO_BLOCO': 'NO_BLOCK_GEO',
        'NO_BLOCO_ING': 'NO_BLOCK_GEO_ENG',
        'NO_BLOCO_ESP': 'NO_BLOCK_GEO_ESP'
    })
    
    # Identify countries belonging to removed blocks -> store as economic blocks
    df_econ_blocks = df[df['CO_BLOCO'].isin(blocks_remove)].copy()
    df_econ_blocks = df_econ_blocks.rename(columns={
        'CO_BLOCO': 'CO_BLOCK_ECO',
        'NO_BLOCO': 'NO_BLOCK_ECO',
        'NO_BLOCO_ING': 'NO_BLOCK_ECO_ENG',
        'NO_BLOCO_ESP': 'NO_BLOCK_ECO_ESP'
    })
    
    # Merge economic block info into the filtered dataframe
    df_filtered = df_filtered.merge(
        df_econ_blocks[['CO_PAIS', 'CO_BLOCK_ECO', 'NO_BLOCK_ECO',
                        'NO_BLOCK_ECO_ENG', 'NO_BLOCK_ECO_ESP']],
        on='CO_PAIS',
        how='left'
    )
    
    return df_filtered


def add_country_info(df, country_df):
    """Merge ISO and country name information into dataframe."""
    df = df.merge(
        country_df[['CO_PAIS', 'CO_PAIS_ISOA3', 'NO_PAIS',
                    'NO_PAIS_ING', 'NO_PAIS_ESP']],
        on='CO_PAIS',
        how='left'
    )
    return df


# ---- EXECUTION ----

country_unique = adjust_blocks(pais_bloco)
country_comexstat = add_country_info(country_unique, pais)

# Save outputs to 'output/' folder
Path("output").mkdir(parents=True, exist_ok=True)

country_comexstat.to_csv(
    'output/pais_comexstat.csv',
    sep=';', decimal=',', index=False, encoding='latin1'
)

ncm_sh.to_csv(
    'output/ncm_sh.csv',
    sep=';', decimal=',', index=False, encoding='latin1'
)

via.to_csv(
    'output/via.csv',
    sep=';', decimal=',', index=False, encoding='latin1'
)