# Trade of goods , US$, HS 1992, ALL COMMODITIES

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = Path.cwd()                                          # get current working directory
warnings.simplefilter('ignore')

# Import Data

In [3]:
df = pd.read_csv('Trade of goods , US$, HS 1992, ALL COMMODITIES.csv')

# Head and Tail

In [4]:
df

Unnamed: 0,Country or Area,Year,Commodity,Flow,Trade (USD),Weight (kg),Quantity Name,Quantity
0,Afghanistan,2018,ALL COMMODITIES,Import,7.406590e+09,,No Quantity,0.0
1,Afghanistan,2018,ALL COMMODITIES,Export,8.845045e+08,0.0,No Quantity,0.0
2,Afghanistan,2018,ALL COMMODITIES,Re-Export,9.263097e+06,0.0,No Quantity,0.0
3,Afghanistan,2016,ALL COMMODITIES,Import,6.534140e+09,,No Quantity,
4,Afghanistan,2016,ALL COMMODITIES,Export,5.964553e+08,,No Quantity,
...,...,...,...,...,...,...,...,...
10139,Zimbabwe,2001,ALL COMMODITIES,Export,1.206663e+09,,No Quantity,
10140,Zimbabwe,2001,ALL COMMODITIES,Re-Export,1.594229e+07,,No Quantity,
10141,Zimbabwe,2000,ALL COMMODITIES,Export,1.879631e+09,,No Quantity,
10142,Zimbabwe,1995,ALL COMMODITIES,Import,2.658853e+09,,No Quantity,


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10144 entries, 0 to 10143
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Country or Area  10144 non-null  object 
 1   Year             10144 non-null  int64  
 2   Commodity        10144 non-null  object 
 3   Flow             10144 non-null  object 
 4   Trade (USD)      10144 non-null  float64
 5   Weight (kg)      983 non-null    float64
 6   Quantity Name    10144 non-null  object 
 7   Quantity         1437 non-null   float64
dtypes: float64(3), int64(1), object(4)
memory usage: 634.1+ KB


In [6]:
df.rename(columns={'Country or Area': 'country_or_area'}, inplace=True)                    
df.columns = [col.lower() for col in df.columns]  
df                           

Unnamed: 0,country_or_area,year,commodity,flow,trade (usd),weight (kg),quantity name,quantity
0,Afghanistan,2018,ALL COMMODITIES,Import,7.406590e+09,,No Quantity,0.0
1,Afghanistan,2018,ALL COMMODITIES,Export,8.845045e+08,0.0,No Quantity,0.0
2,Afghanistan,2018,ALL COMMODITIES,Re-Export,9.263097e+06,0.0,No Quantity,0.0
3,Afghanistan,2016,ALL COMMODITIES,Import,6.534140e+09,,No Quantity,
4,Afghanistan,2016,ALL COMMODITIES,Export,5.964553e+08,,No Quantity,
...,...,...,...,...,...,...,...,...
10139,Zimbabwe,2001,ALL COMMODITIES,Export,1.206663e+09,,No Quantity,
10140,Zimbabwe,2001,ALL COMMODITIES,Re-Export,1.594229e+07,,No Quantity,
10141,Zimbabwe,2000,ALL COMMODITIES,Export,1.879631e+09,,No Quantity,
10142,Zimbabwe,1995,ALL COMMODITIES,Import,2.658853e+09,,No Quantity,


In [7]:
cols = ['country_or_area', 'year', 'flow', 'trade (usd)']
df_sub = df[cols]
df_sub

Unnamed: 0,country_or_area,year,flow,trade (usd)
0,Afghanistan,2018,Import,7.406590e+09
1,Afghanistan,2018,Export,8.845045e+08
2,Afghanistan,2018,Re-Export,9.263097e+06
3,Afghanistan,2016,Import,6.534140e+09
4,Afghanistan,2016,Export,5.964553e+08
...,...,...,...,...
10139,Zimbabwe,2001,Export,1.206663e+09
10140,Zimbabwe,2001,Re-Export,1.594229e+07
10141,Zimbabwe,2000,Export,1.879631e+09
10142,Zimbabwe,1995,Import,2.658853e+09


In [8]:
df_sub.describe(include='object')

Unnamed: 0,country_or_area,flow
count,10144,10144
unique,210,4
top,Canada,Import
freq,98,4173


In [9]:
df_sub['country_or_area'].value_counts()

Canada         98
Thailand       98
New Zealand    97
Cyprus         95
Australia      93
               ..
Djibouti        3
Eritrea         3
Cayman Isds     2
Tajikistan      2
Chad            1
Name: country_or_area, Length: 210, dtype: int64

In [10]:
df_sub['year'].value_counts()

2010    449
2012    445
2013    441
2011    441
2007    433
2008    432
2009    432
2014    430
2005    429
2006    426
2004    420
2015    417
2003    414
2002    400
2016    399
2017    392
2001    391
2000    388
2018    336
1999    321
1997    301
1998    295
1996    276
1995    258
1994    218
1993    157
1992    125
1991     90
1990     68
1989     56
2019     40
1988     24
Name: year, dtype: int64

In [11]:
df_sub['flow'].value_counts()

Import       4173
Export       4156
Re-Export    1156
Re-Import     659
Name: flow, dtype: int64

# Descriptive statistics

In [12]:
min_trade = np.round(df_sub['trade (usd)'].min(), 2)
mean_trade = np.round(df_sub['trade (usd)'].mean(), 2)
max_trade = np.round(df_sub['trade (usd)'].max(), 2)

print('Minimum, mean and maximum trades are ${:,}, ${:,} and ${:,} respectively'.format(min_trade, mean_trade, max_trade))

df_sub['trade (usd)'].describe(include='number')

Minimum, mean and maximum trades are $2.0, $66,931,662,399.26 and $2,611,432,490,157.0 respectively


count    1.014400e+04
mean     6.693166e+10
std      2.112641e+11
min      2.000000e+00
25%      4.379713e+08
50%      4.022039e+09
75%      3.381102e+10
max      2.611432e+12
Name: trade (usd), dtype: float64

# Discretization and quantiling

In [13]:
df_sub['trade (usd)'].describe()         

count    1.014400e+04
mean     6.693166e+10
std      2.112641e+11
min      2.000000e+00
25%      4.379713e+08
50%      4.022039e+09
75%      3.381102e+10
max      2.611432e+12
Name: trade (usd), dtype: float64

In [14]:
df_sub['trade_cat'] = pd.cut(df_sub['trade (usd)'], [2.0, 4.02e9, 6.69e10, np.inf], labels=['low', 'medium', 'high'])
df_sub

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
0,Afghanistan,2018,Import,7.406590e+09,medium
1,Afghanistan,2018,Export,8.845045e+08,low
2,Afghanistan,2018,Re-Export,9.263097e+06,low
3,Afghanistan,2016,Import,6.534140e+09,medium
4,Afghanistan,2016,Export,5.964553e+08,low
...,...,...,...,...,...
10139,Zimbabwe,2001,Export,1.206663e+09,low
10140,Zimbabwe,2001,Re-Export,1.594229e+07,low
10141,Zimbabwe,2000,Export,1.879631e+09,low
10142,Zimbabwe,1995,Import,2.658853e+09,low


In [15]:
df_sub.describe(exclude='number')

Unnamed: 0,country_or_area,flow,trade_cat
count,10144,10144,10143
unique,210,4,3
top,Canada,Import,low
freq,98,4173,5070


In [16]:
df_sub['flow'].value_counts()                     

Import       4173
Export       4156
Re-Export    1156
Re-Import     659
Name: flow, dtype: int64

# Dataframe Grouping

In [17]:
cnt_area_grp = df_sub.groupby(['country_or_area'])
cnt_area_grp.head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
0,Afghanistan,2018,Import,7.406590e+09,medium
1,Afghanistan,2018,Export,8.845045e+08,low
2,Afghanistan,2018,Re-Export,9.263097e+06,low
3,Afghanistan,2016,Import,6.534140e+09,medium
4,Afghanistan,2016,Export,5.964553e+08,low
...,...,...,...,...,...
10100,Zimbabwe,2018,Import,6.258856e+09,medium
10101,Zimbabwe,2018,Export,4.037203e+09,medium
10102,Zimbabwe,2017,Import,4.962103e+09,medium
10103,Zimbabwe,2017,Export,3.480382e+09,low


In [18]:
cnt_or_area_stats = cnt_area_grp.agg(
    min_trade=('trade (usd)', 'min'),
    mean_trade=('trade (usd)', 'mean'),
    max_trade=('trade (usd)', 'max'),
    tot_trade=('trade (usd)', 'sum')
)

cnt_or_area_stats

Unnamed: 0_level_0,min_trade,mean_trade,max_trade,tot_trade
country_or_area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,9.263097e+06,3.204996e+09,8.554414e+09,6.730491e+10
Albania,6.792000e+03,1.860073e+09,5.941287e+09,1.060241e+11
Algeria,2.272710e+05,2.962302e+10,7.929759e+10,1.599643e+12
Andorra,1.301725e+05,5.985092e+08,1.939930e+09,3.471354e+10
Angola,3.920000e+02,3.173247e+10,7.086308e+10,7.615793e+11
...,...,...,...,...
Viet Nam,1.448274e+10,8.322834e+10,2.151186e+11,2.996220e+12
Wallis and Futuna Isds,3.258690e+07,4.423441e+07,5.720028e+07,3.096408e+08
Yemen,4.505609e+07,4.752300e+09,1.327293e+10,1.758351e+11
Zambia,2.757621e+08,4.210576e+09,1.059407e+10,1.978971e+11


In [19]:
cnt_or_area_stats['tot_trade'].sort_values(ascending=False).head(10)   # top 10 countries or areas with the highest trade in US dollars

country_or_area
USA                     7.466925e+13
EU-28                   6.601481e+13
China                   5.282041e+13
Germany                 4.845993e+13
Japan                   3.280613e+13
China, Hong Kong SAR    2.567542e+13
United Kingdom          2.273016e+13
France                  2.269440e+13
Canada                  1.911553e+13
Italy                   1.883860e+13
Name: tot_trade, dtype: float64

In [20]:
cnt_or_area_stats['tot_trade'].sort_values(ascending=True).head(10)   # top 10 countries or areas with the lowest trade in US dollars

country_or_area
Tuvalu                    1.037393e+08
Chad                      2.151683e+08
Wallis and Futuna Isds    3.096408e+08
Guinea-Bissau             3.715047e+08
Eritrea                   4.404139e+08
Montserrat                5.151449e+08
Cook Isds                 7.659912e+08
Haiti                     9.398224e+08
Anguilla                  1.170307e+09
Djibouti                  1.217550e+09
Name: tot_trade, dtype: float64

In [21]:
cnt_area_grp.get_group('USA').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
9816,USA,2018,Import,2611432000000.0,high
9817,USA,2018,Export,1665303000000.0,high
9818,USA,2018,Re-Export,252049300000.0,high
9819,USA,2017,Import,2405277000000.0,high
9820,USA,2017,Export,1545810000000.0,high


In [22]:
cnt_area_grp.get_group('EU-28').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
3155,EU-28,2018,Import,2335303000000.0,high
3156,EU-28,2018,Export,2305894000000.0,high
3157,EU-28,2017,Import,2095737000000.0,high
3158,EU-28,2017,Export,2120912000000.0,high
3159,EU-28,2016,Import,1894809000000.0,high


In [23]:
cnt_area_grp.get_group('China').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
1958,China,2018,Import,2134983000000.0,high
1959,China,2018,Export,2494230000000.0,high
1960,China,2018,Re-Import,146381800000.0,high
1961,China,2017,Import,1843793000000.0,high
1962,China,2017,Export,2263371000000.0,high


In [24]:
cnt_area_grp.get_group('Germany').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
3726,Germany,2018,Import,1292726000000.0,high
3727,Germany,2018,Export,1562419000000.0,high
3728,Germany,2017,Import,1167753000000.0,high
3729,Germany,2017,Export,1446642000000.0,high
3730,Germany,2016,Import,1060882000000.0,high


In [25]:
cnt_area_grp.get_group('Japan').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
4783,Japan,2019,Import,720894800000.0,high
4784,Japan,2019,Export,705640000000.0,high
4785,Japan,2018,Import,748217600000.0,high
4786,Japan,2018,Export,738201200000.0,high
4787,Japan,2017,Import,671474300000.0,high


In [26]:
cnt_area_grp.get_group('China, Hong Kong SAR').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
2030,"China, Hong Kong SAR",2019,Import,578590200000.0,high
2031,"China, Hong Kong SAR",2019,Export,535711000000.0,high
2032,"China, Hong Kong SAR",2018,Import,627327000000.0,high
2033,"China, Hong Kong SAR",2018,Export,569105700000.0,high
2034,"China, Hong Kong SAR",2017,Import,589317400000.0,high


In [27]:
cnt_area_grp.get_group('United Kingdom').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
9632,United Kingdom,2018,Import,671694300000.0,high
9633,United Kingdom,2018,Export,490840400000.0,high
9634,United Kingdom,2018,Re-Import,5055648000.0,medium
9635,United Kingdom,2017,Import,640907700000.0,high
9636,United Kingdom,2017,Export,441847300000.0,high


In [28]:
cnt_area_grp.get_group('France').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
3396,France,2018,Import,659374500000.0,high
3397,France,2018,Export,568535900000.0,high
3398,France,2018,Re-Import,5690757000.0,medium
3399,France,2017,Import,613132600000.0,high
3400,France,2017,Export,523385100000.0,high


In [29]:
cnt_area_grp.get_group('Canada').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
1742,Canada,2019,Import,453012600000.0,high
1743,Canada,2019,Export,445727500000.0,high
1744,Canada,2019,Re-Export,36272370000.0,medium
1745,Canada,2019,Re-Import,3356662000.0,low
1746,Canada,2018,Import,459947600000.0,high


In [30]:
cnt_area_grp.get_group('Italy').head()

Unnamed: 0,country_or_area,year,flow,trade (usd),trade_cat
4637,Italy,2018,Import,503581100000.0,high
4638,Italy,2018,Export,549907000000.0,high
4639,Italy,2018,Re-Export,23853110000.0,medium
4640,Italy,2018,Re-Import,4986970000.0,medium
4641,Italy,2017,Import,453583000000.0,high
