# Trade of goods , US$, HS 1992, ALL COMMODITIES

http://unstats.un.org/unsd/
https://comtrade.un.org/db/mr/rfGlossaryList.aspx

The United Nations Commodity Trade Statistics Database (UN Comtrade) stores more than 1 billion trade data records from 1962. Over 140 reporter countries provide the United Nations Statistics Division with their annual international trade statistics detailed by commodities and partner countries. These data are subsequently transformed into the United Nations Statistics Division standard format with consistent coding and valuation using the UN/OECD CoprA internal processing system.


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings

# Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = Path.cwd()                                          # get current working directory
warnings.filterwarnings('ignore')

# Fetch Data

In [3]:
df = pd.read_csv('Trade of goods , US$, HS 1992, ALL COMMODITIES.csv')

# Head and Tail

In [4]:
df

Unnamed: 0,Country or Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity Name,Quantity
0,Afghanistan,2018,ALL COMMODITIES,Import,7.406590e+09,,No Quantity,0.0
1,Afghanistan,2018,ALL COMMODITIES,Export,8.845045e+08,0.0,No Quantity,0.0
2,Afghanistan,2018,ALL COMMODITIES,Re-Export,9.263097e+06,0.0,No Quantity,0.0
3,Afghanistan,2016,ALL COMMODITIES,Import,6.534140e+09,,No Quantity,
4,Afghanistan,2016,ALL COMMODITIES,Export,5.964553e+08,,No Quantity,
...,...,...,...,...,...,...,...,...
10139,Zimbabwe,2001,ALL COMMODITIES,Export,1.206663e+09,,No Quantity,
10140,Zimbabwe,2001,ALL COMMODITIES,Re-Export,1.594229e+07,,No Quantity,
10141,Zimbabwe,2000,ALL COMMODITIES,Export,1.879631e+09,,No Quantity,
10142,Zimbabwe,1995,ALL COMMODITIES,Import,2.658853e+09,,No Quantity,


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10144 entries, 0 to 10143
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Country or Area  10144 non-null  object 
 1   Year             10144 non-null  int64  
 2   Commodity        10144 non-null  object 
 3   Flow             10144 non-null  object 
 4   Trade (USD)      10144 non-null  float64
 5   Weight (kg)      983 non-null    float64
 6   Quantity Name    10144 non-null  object 
 7   Quantity         1437 non-null   float64
dtypes: float64(3), int64(1), object(4)
memory usage: 634.1+ KB


In [6]:
df.rename(columns={'Country or Area': 'country_or_area'}, inplace=True)                    
df.columns = [col.lower() for col in df.columns]  
df                           

Unnamed: 0,country_or_area,year,commodity,flow,trade (usd),weight (kg),quantity name,quantity
0,Afghanistan,2018,ALL COMMODITIES,Import,7.406590e+09,,No Quantity,0.0
1,Afghanistan,2018,ALL COMMODITIES,Export,8.845045e+08,0.0,No Quantity,0.0
2,Afghanistan,2018,ALL COMMODITIES,Re-Export,9.263097e+06,0.0,No Quantity,0.0
3,Afghanistan,2016,ALL COMMODITIES,Import,6.534140e+09,,No Quantity,
4,Afghanistan,2016,ALL COMMODITIES,Export,5.964553e+08,,No Quantity,
...,...,...,...,...,...,...,...,...
10139,Zimbabwe,2001,ALL COMMODITIES,Export,1.206663e+09,,No Quantity,
10140,Zimbabwe,2001,ALL COMMODITIES,Re-Export,1.594229e+07,,No Quantity,
10141,Zimbabwe,2000,ALL COMMODITIES,Export,1.879631e+09,,No Quantity,
10142,Zimbabwe,1995,ALL COMMODITIES,Import,2.658853e+09,,No Quantity,


In [7]:
cols = ['country_or_area', 'year', 'flow', 'trade (usd)']
df_sub = df[cols]
df_sub

Unnamed: 0,country_or_area,year,flow,trade (usd)
0,Afghanistan,2018,Import,7.406590e+09
1,Afghanistan,2018,Export,8.845045e+08
2,Afghanistan,2018,Re-Export,9.263097e+06
3,Afghanistan,2016,Import,6.534140e+09
4,Afghanistan,2016,Export,5.964553e+08
...,...,...,...,...
10139,Zimbabwe,2001,Export,1.206663e+09
10140,Zimbabwe,2001,Re-Export,1.594229e+07
10141,Zimbabwe,2000,Export,1.879631e+09
10142,Zimbabwe,1995,Import,2.658853e+09


# Descriptive statistics

In [8]:
min_trade = np.round(df_sub['trade (usd)'].min(), 2)
mean_trade = np.round(df_sub['trade (usd)'].mean(), 2)
max_trade = np.round(df_sub['trade (usd)'].max(), 2)

print('The minimum, mean and maximum trades are ${:,}, ${:,} and ${:,} respectively'.format(min_trade, mean_trade, max_trade))

df_sub['trade (usd)'].describe(include='number')

The minimum, mean and maximum trades are $2.0, $66,931,662,399.26 and $2,611,432,490,157.0 respectively


count    1.014400e+04
mean     6.693166e+10
std      2.112641e+11
min      2.000000e+00
25%      4.379713e+08
50%      4.022039e+09
75%      3.381102e+10
max      2.611432e+12
Name: trade (usd), dtype: float64

# Discretization and quantiling

In [9]:
df_sub['trade cat'] = pd.qcut(df_sub['trade (usd)'], [0, 0.25, 0.5, 0.75, 1], labels=['low', 'medium', 'high', 'very high'])
df_sub

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
0,Afghanistan,2018,Import,7.406590e+09,high
1,Afghanistan,2018,Export,8.845045e+08,medium
2,Afghanistan,2018,Re-Export,9.263097e+06,low
3,Afghanistan,2016,Import,6.534140e+09,high
4,Afghanistan,2016,Export,5.964553e+08,medium
...,...,...,...,...,...
10139,Zimbabwe,2001,Export,1.206663e+09,medium
10140,Zimbabwe,2001,Re-Export,1.594229e+07,low
10141,Zimbabwe,2000,Export,1.879631e+09,medium
10142,Zimbabwe,1995,Import,2.658853e+09,medium


In [10]:
df_sub.describe(exclude='number')

Unnamed: 0,country_or_area,flow,trade cat
count,10144,10144,10144
unique,210,4,4
top,Canada,Import,low
freq,98,4173,2536


In [11]:
df_sub['flow'].value_counts()                     # count unique flow

Import       4173
Export       4156
Re-Export    1156
Re-Import     659
Name: flow, dtype: int64

# Dataframe Grouping

In [12]:
cnt_area_grp = df_sub.groupby(['country_or_area'])
cnt_area_grp.head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
0,Afghanistan,2018,Import,7.406590e+09,high
1,Afghanistan,2018,Export,8.845045e+08,medium
2,Afghanistan,2018,Re-Export,9.263097e+06,low
3,Afghanistan,2016,Import,6.534140e+09,high
4,Afghanistan,2016,Export,5.964553e+08,medium
...,...,...,...,...,...
10100,Zimbabwe,2018,Import,6.258856e+09,high
10101,Zimbabwe,2018,Export,4.037203e+09,high
10102,Zimbabwe,2017,Import,4.962103e+09,high
10103,Zimbabwe,2017,Export,3.480382e+09,medium


In [13]:
cnt_area_grp.get_group('China')

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
1958,China,2018,Import,2.134983e+12,very high
1959,China,2018,Export,2.494230e+12,very high
1960,China,2018,Re-Import,1.463818e+11,very high
1961,China,2017,Import,1.843793e+12,very high
1962,China,2017,Export,2.263371e+12,very high
...,...,...,...,...,...
2025,China,1994,Export,1.210063e+11,very high
2026,China,1993,Import,1.039589e+11,very high
2027,China,1993,Export,9.174395e+10,very high
2028,China,1992,Import,8.058530e+10,very high


In [14]:
cnt_area_grp.get_group('USA')

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
9816,USA,2018,Import,2.611432e+12,very high
9817,USA,2018,Export,1.665303e+12,very high
9818,USA,2018,Re-Export,2.520493e+11,very high
9819,USA,2017,Import,2.405277e+12,very high
9820,USA,2017,Export,1.545810e+12,very high
...,...,...,...,...,...
9895,USA,1992,Export,4.473301e+11,very high
9896,USA,1992,Re-Export,2.245862e+10,high
9897,USA,1991,Import,5.089440e+11,very high
9898,USA,1991,Export,4.215554e+11,very high


In [15]:
cnt_area_grp.get_group('Germany').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
3726,Germany,2018,Import,1292726000000.0,very high
3727,Germany,2018,Export,1562419000000.0,very high
3728,Germany,2017,Import,1167753000000.0,very high
3729,Germany,2017,Export,1446642000000.0,very high
3730,Germany,2016,Import,1060882000000.0,very high


In [16]:
cnt_area_grp.get_group('Netherlands').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
6300,Netherlands,2018,Import,500630500000.0,very high
6301,Netherlands,2018,Export,555921400000.0,very high
6302,Netherlands,2017,Import,461870300000.0,very high
6303,Netherlands,2017,Export,527937000000.0,very high
6304,Netherlands,2016,Import,408065000000.0,very high


In [17]:
cnt_area_grp.get_group('Japan').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
4783,Japan,2019,Import,720894800000.0,very high
4784,Japan,2019,Export,705640000000.0,very high
4785,Japan,2018,Import,748217600000.0,very high
4786,Japan,2018,Export,738201200000.0,very high
4787,Japan,2017,Import,671474300000.0,very high


In [18]:
cnt_area_grp.get_group('China, Hong Kong SAR').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
2030,"China, Hong Kong SAR",2019,Import,578590200000.0,very high
2031,"China, Hong Kong SAR",2019,Export,535711000000.0,very high
2032,"China, Hong Kong SAR",2018,Import,627327000000.0,very high
2033,"China, Hong Kong SAR",2018,Export,569105700000.0,very high
2034,"China, Hong Kong SAR",2017,Import,589317400000.0,very high


In [19]:
cnt_area_grp.get_group('Rep. of Korea').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
7312,Rep. of Korea,2018,Import,535183400000.0,very high
7313,Rep. of Korea,2018,Export,604807300000.0,very high
7314,Rep. of Korea,2017,Import,478469200000.0,very high
7315,Rep. of Korea,2017,Export,573627400000.0,very high
7316,Rep. of Korea,2016,Import,406181900000.0,very high


In [20]:
cnt_area_grp.get_group('Italy').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
4637,Italy,2018,Import,503581100000.0,very high
4638,Italy,2018,Export,549907000000.0,very high
4639,Italy,2018,Re-Export,23853110000.0,high
4640,Italy,2018,Re-Import,4986970000.0,high
4641,Italy,2017,Import,453583000000.0,very high


In [21]:
cnt_area_grp.get_group('France').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
3396,France,2018,Import,659374500000.0,very high
3397,France,2018,Export,568535900000.0,very high
3398,France,2018,Re-Import,5690757000.0,high
3399,France,2017,Import,613132600000.0,very high
3400,France,2017,Export,523385100000.0,very high


In [22]:
cnt_area_grp.get_group('Belgium').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade cat
932,Belgium,2018,Import,454713800000.0,very high
933,Belgium,2018,Export,468643300000.0,very high
934,Belgium,2017,Import,409024800000.0,very high
935,Belgium,2017,Export,430092400000.0,very high
936,Belgium,2016,Import,379298300000.0,very high
