In [63]:
import pandas as pd

# import necessary files
df_import = pd.read_csv("CH 852351 Import Update 2023.csv")
df_export = pd.read_csv("CH 852351 Export Update 2023.csv")
df_iso = pd.read_csv("China_ISO_index.csv")


# change date format to yyyy-mm-dd
def update_date(df,date_col):
    for index, value in df.iterrows():
        df.loc[index, date_col] = f'{str(df.loc[index, date_col])[0:4]}-{str(df.loc[index, date_col])[4:]}-01'
    return df

# add ISO code to dataframe using left join on country code column
def update_iso(df,df_iso, country_col):
# add country for import file iso code from iso csv file 
    df = df.merge(df_iso[['Country_Code', 'ISO']], left_on=country_col, right_on='Country_Code', how='left')
    # df.rename(columns={'ISO Code': 'ISO'}, inplace=True)
    return df

# remove all columns name to uniform name
def uniform_title(df):
    df.rename(columns={'Date of data': 'DATE',
                       'Commodity code': 'HS_CODE',
                       'Customs Regime': 'CUSTOM_TYPE',
                       'Locations of importers and exporters': 'DOMESTIC_LOCATION',
                       'US dollar': 'VALUE_USD'}, inplace=True)
    return df

# remove unuse columns
def remove_columns(df):
    df.drop(columns=['Commodity', 
                        'Trading partner code', 
                        'Trading partner', 
                        'Customs Regime code', 
                        'Locations of importers and exporters code', 
                        'Country_Code'], axis=0, inplace=True)
    return df

# remove surfix 'Province'
def concat_chinese_regions(df,col_name, old_string, new_string):
    df[col_name] = df[col_name].str.replace(old_string, new_string)
    return df

# clean comma and convert datatype
def clean_comma_in_value(df, col_name):
    df[col_name] = df[col_name].str.replace(",", "")
    df[col_name] = df[col_name].astype(int)
    return df

# apply changes using defined functions to export dataframe
df_export = update_date(df_export, 'Date of data')
df_export = update_iso(df_export,df_iso, 'Trading partner code')
df_export = uniform_title(df_export)
df_export = remove_columns(df_export)
df_export = concat_chinese_regions(df_export, 'DOMESTIC_LOCATION', ' Province', '')
df_export = clean_comma_in_value(df_export, 'VALUE_USD')

# apply changes using defined functions to import dataframe
df_import = update_date(df_import, 'Date of data')
df_import = update_iso(df_import,df_iso, 'Trading partner code')
df_import = uniform_title(df_import)
df_import = remove_columns(df_import)
df_import = concat_chinese_regions(df_import, 'DOMESTIC_LOCATION', ' Province', '')
df_import = clean_comma_in_value(df_import, 'VALUE_USD')

#check the function results
df_import.head()

# export both import and export dataframe to csv files
df_import.to_csv("DB_CH_IMPORT_UPDATE_2023_APR_2023052304.csv", index=False)
df_export.to_csv("DB_CH_EXPORT_UPDATE_2023_APR_2023052304.csv", index=False)

In [53]:
# check if there is any ISO name missed to replace
df_import[df_import["ISO"].isna()]

Unnamed: 0,DATE,HS_CODE,Trading partner code,Trading partner,CUSTOM_TYPE,DOMESTIC_LOCATION,VALUE_USD,ISO


In [61]:
# check the datatype VALUE
print(df_export.VALUE_USD.dtypes)

int32
