# UN Comtrade exports data

Source: [UN Comtrade database](https://comtrade.un.org/data/)

In [1]:
import pandas as pd

material = 'plastics' # 'plastics', 'paper'

In [2]:
df = pd.read_csv(
        f'data/raw/un-comtrade/{material}-exports-2017.csv',
        usecols=[3, 8, 9, 29],
        parse_dates=['Period Desc.']
    ) \
    .rename({
        'Period Desc.': 'period',
        'Reporter Code': 'country_code',
        'Reporter': 'country_name',
        'Netweight (kg)': 'kg'
    }, axis=1) \
    .fillna(0) \
    .sort_values(['country_name', 'period']) \
    .reset_index(drop=True)

print(len(df))

df.head()

2539


Unnamed: 0,period,country_code,country_name,kg
0,2017-01-01,12,Algeria,43346.0
1,2017-03-01,12,Algeria,32800.0
2,2017-03-01,28,Antigua and Barbuda,17000.0
3,2017-05-01,28,Antigua and Barbuda,1000.0
4,2017-10-01,28,Antigua and Barbuda,20123.0


In [3]:
df.dtypes

period          datetime64[ns]
country_code             int64
country_name            object
kg                     float64
dtype: object

In [4]:
df.isnull().any()

period          False
country_code    False
country_name    False
kg              False
dtype: bool

## Join to country codes

In [5]:
df_iso2 = pd.read_excel(
        'data/raw/Comtrade Country Code and ISO list.xlsx',
        usecols=[0, 1, 4, 7],
        na_values='N/A',
        keep_default_na=False # Necessary because the ISO 2 code for Namibia is ‘NA’
    ) \
    .rename({
        'Country Code': 'country_code',
        'ISO2-digit Alpha': 'iso2',
        'End Valid Year': 'end_valid_year'
    }, axis=1) \
    .query('end_valid_year == "Now"') \
    .drop('end_valid_year', axis=1) \
    .dropna() \
    .reset_index(drop=True)

print(len(df_iso2))

df_iso2.head()

235


Unnamed: 0,country_code,"Country Name, Full",iso2
0,4,Afghanistan,AF
1,8,Albania,AL
2,12,Algeria,DZ
3,16,American Samoa,AS
4,20,Andorra,AD


In [6]:
df_joined = df.copy() \
    .query('country_name not in "EU-27"') \
    .merge(df_iso2, 'left', 'country_code') \
    .drop(['country_code', 'Country Name, Full '], axis=1)

print(len(df_joined))

df_joined.head()

2491


Unnamed: 0,period,country_name,kg,iso2
0,2017-01-01,Algeria,43346.0,DZ
1,2017-03-01,Algeria,32800.0,DZ
2,2017-03-01,Antigua and Barbuda,17000.0,AG
3,2017-05-01,Antigua and Barbuda,1000.0,AG
4,2017-10-01,Antigua and Barbuda,20123.0,AG


## Write monthly data to CSV

In [7]:
# df_joined[['Time', 'country_code', 'Country', 'kg']].to_csv(
#     f'data/processed/world-{material}-2017.csv',
#     index=False
# )