In [1]:
from urllib.parse import urlencode

import economic_complexity as ecplx
import numpy as np
import pandas as pd

In [2]:
def request_data(params):
    url = "https://dev.oec.world/olap-proxy/data.csv?{}".format(urlencode(params))
    return pd.read_csv(url)

In [2]:
# Sum of Exports by Country and HS92 Level 4 Code between 2016 and 2018
df_trade= pd.read_csv("https://app-tiger.oec.world/olap-proxy/data.csv?cube=trade_i_baci_a_92&measures=Trade%20Value&drilldowns=Exporter%20Country,HS4&Year=2016,2017,2018")

# World Population by Country for Year 2018
df_wdi = pd.read_csv("https://app-tiger.oec.world/olap-proxy/data.csv?cube=indicators_i_wdi_a&measures=Measure&drilldowns=Country&Indicator=SP.POP.TOTL&Year=2018")

In [3]:
#df_trade.info()
print(df_trade)
df_wdi.info()

       Country ID    Country  HS4 ID                HS4  Trade Value
0           afago     Angola   10101             Horses        615.0
1           afago     Angola   10102             Bovine      23491.0
2           afago     Angola   10103               Pigs        593.0
3           afago     Angola   10104    Sheep and Goats       3264.0
4           afago     Angola   10105            Poultry       9879.0
...           ...        ...     ...                ...          ...
201595      saven  Venezuela  219702             Prints     619426.0
201596      saven  Venezuela  219703         Sculptures    9290086.0
201597      saven  Venezuela  219704     Revenue Stamps       1434.0
201598      saven  Venezuela  219705  Collector's Items    1414813.0
201599      saven  Venezuela  219706           Antiques      12908.0

[201600 rows x 5 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
-

In [4]:
df = df_trade.copy()

# Countries with more than 1M habitants
df_population = df_wdi[df_wdi['Measure'] > 1000000]
# Products with more than $1.5B in global exports between 2016-2018
df_products = df.groupby('HS4 ID')['Trade Value'].sum().reset_index()
df_products = df_products[df_products['Trade Value'] > 3*500000000]
# Countries with more than $3B in global exports between 2016-2018
df_countries = df.groupby('Country ID')['Trade Value'].sum().reset_index()
df_countries = df_countries[df_countries['Trade Value'] > 3*1000000000]

df_filter  = df[
  (df['Country ID'].isin(df_population['Country ID'])) &
  (df['Country ID'].isin(df_countries['Country ID'])) & 
  (df['HS4 ID'].isin(df_products['HS4 ID']))
]



In [5]:
df_filter.info()

<class 'pandas.core.frame.DataFrame'>
Index: 136787 entries, 0 to 201599
Data columns (total 5 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   Country ID   136787 non-null  object 
 1   Country      136787 non-null  object 
 2   HS4 ID       136787 non-null  int64  
 3   HS4          136787 non-null  object 
 4   Trade Value  136787 non-null  float64
dtypes: float64(1), int64(1), object(3)
memory usage: 6.3+ MB


In [7]:
df_pivot = pd.pivot_table(df_filter, index=['Country'],
                                     columns=['HS4 ID'],
                                     values='Trade Value')\
             .reset_index()\
             .set_index('Country')\
             .dropna(axis=1, how="all")\
             .fillna(0)\
             .astype(float)


In [9]:
rca = ecplx.rca(df_pivot)
ECI, PCI = ecplx.complexity(rca)

In [10]:
ECI.sort_values(ascending=False)

Country
Japan               2.389178
Chinese Taipei      2.183193
Switzerland         2.085713
South Korea         2.019607
Germany             1.984514
                      ...   
Nigeria            -1.648391
Papua New Guinea   -1.738730
Iraq               -1.870009
South Sudan        -2.240257
Chad               -2.547961
Length: 148, dtype: float64