In [1]:
import pandas as pd
import numpy as np
import time

import json
import requests

In [2]:
def api_url(params):
    '''
    Helper to create request string
    '''
    API_ENDPOINT = 'https://comtrade.un.org/api/get?'
    
    return API_ENDPOINT + '&'.join(
        [ '{k}={v}'.format(k=k, v=v) for k, v in params.items()])

In [3]:
params_trade_total = {
    'max': 1000,
    'type': 'C',
    'freq': 'A',
    'px': 'HS',
    'ps': 2020,
    'r': 643,    # Russia's code
    'p': 'all',
    'rg': 'all',
    'cc': 'TOTAL',
    'fmt': 'csv'
}

### RUSSIAN IMPORT/EXPORT TRADE VALUE PER PARTNER: 2000 - 2020

In [None]:
params_trade_total['fmt'] = 'csv'

for year in range(2000, 2021):
    params_trade_total['ps'] = year
    
    data = pd.read_csv(
        api_url(params_trade_total)
    )
    print('Year:', year, 'records:', len(data))
    
    data.to_csv(
        'data/total-trade-value-{year}.csv'.format(year=year),
        index=False
    )
    time.sleep(30)

### !TODO: IMPORT/EXPORT PRODUCTS FROM TOP 10 PARTNERS

In [2]:
def api_url2(params):
    '''
    Helper to create request string
    '''
    API_ENDPOINT = 'https://comtrade.un.org/api/get?'
    
    url = []
    for k, v in params.items():
        if isinstance(v, list):
           url.append(
               '{k}={v}'.format(k=k, v='%2C'.join(str(vi) for vi in v))
           )
        else:
            url.append('{k}={v}'.format(k=k, v=v))
    
    return API_ENDPOINT + '&'.join(url)

In [3]:
params_products = {
    'max': 5000,
    'type': 'C',
    'freq': 'A',
    'px': 'HS',
    'ps': 2020,   # year
    'r': 643,    # Russia's code
    'p': [276, 381, 112, 156, 804],   # Partner code separated by comma , 381, 112, 156, 804
    'rg': [1, 2],    # 1 - Import, 2 - Export
    'cc': 'AG2',
    'fmt': 'csv'
}

#### GET PARTNERS CODES

In [9]:
cols = [
    'Year',
    'Trade Flow',
    'Reporter',
    'Reporter Code',
    'Reporter ISO',
    'Partner',
    'Partner Code',
    'Partner ISO',
    'Trade Value (US$)',
    'Commodity',
    'Commodity Code'
]

data = pd.DataFrame([])

for year in range(2000, 2021):
    df = pd.read_csv(
        'data/total-trade-value-{year}.csv'.format(year = year),
        usecols = cols
    )
    
    data = pd.concat([data, df])
    
    
subcountries = ['Algeria', 'Argentina', 'Australia', 'Austria',
       'Azerbaijan', 'Belarus', 'Belgium', 'Br. Virgin Isds', 'Brazil',
       'Bulgaria', 'Canada', 'China', 'China, Hong Kong SAR', 'Cuba',
       'Cyprus', 'Czechia', 'Denmark', 'Ecuador', 'Egypt', 'Estonia',
       'Finland', 'France', 'Germany', 'Gibraltar', 'Greece', 'Hungary',
       'India', 'Indonesia', 'Iran', 'Ireland', 'Israel', 'Italy',
       'Japan', 'Kazakhstan', 'Latvia', 'Lithuania', 'Malaysia', 'Malta',
       'Netherlands', 'Norway', 'Other Asia, nes',
       'Poland', 'Rep. of Korea', 'Rep. of Moldova', 'Romania', 'Serbia',
       'Singapore', 'Slovakia', 'Slovenia', 'Spain', 'Sweden',
       'Switzerland', 'Tajikistan', 'Thailand', 'Turkey', 'Turkmenistan',
       'USA', 'Ukraine', 'United Arab Emirates', 'United Kingdom',
       'Uzbekistan', 'Viet Nam']

part_codes = np.unique(data.loc[
    data.Partner.isin(subcountries),
    'Partner Code'
])

In [10]:
len(part_codes)

62

In [11]:
len(subcountries)

62

#### DOWNLOAD DATA

In [21]:
part_codes = list(part_codes)

partners_chunks = []
for i in range(0, len(part_codes), 5):
    partners_chunks.append(part_codes[i:i+5])
    
years_chunks = []
years = list(range(2000, 2021))
for i in range(0, len(years), 5):
    years_chunks.append(years[i:i+5])

In [None]:
for p in partners_chunks:
    for ps in years_chunks:
        
        params_products['p'] = p
        params_products['ps'] = ps
        
        data_goods = pd.read_csv(
            api_url2(params_products)
        )
        
        print('Years:', ps, 'partners:', p, 'records:', len(data_goods))
        
        data_goods.to_csv(
            'data/goods_{partners}_{years}.csv'.format(
                partners = '-'.join( str(_) for _ in p),
                years = '-'.join( str(_) for _ in ps)
            ),
            index=False
        )
        
        time.sleep(30)

Years: [2010, 2011, 2012, 2013, 2014] partners: [842, 860] records: 1  
Years: [2005, 2006, 2007, 2008, 2009] partners: [458, 470, 490, 498, 528] records: 1  
Years: [2010, 2011, 2012, 2013, 2014] partners: [458, 470, 490, 498, 528] records: 1  
Years: [2005, 2006, 2007, 2008, 2009] partners: [124, 156, 192, 196, 203] records: 1  

#### RELOAD EMPTY DATA

In [27]:
def load_goods(partners, years, fmt = 'json', save = False):
    params_products = {
        'max': 5000,
        'type': 'C',
        'freq': 'A',
        'px': 'HS',
        'ps': years,   # year
        'r': 643,    # Russia's code
        'p': partners,   # Partner code separated by comma
        'rg': [1, 2],    # 1 - Import, 2 - Export
        'cc': 'AG2',
        'fmt': fmt
    }
    
    # Map JSON headings to CSV headings (rest should be added for consistency)
    json_csv_cols = {
        'yr': 'Year',
        'rgDesc': 'Trade Flow',
        'rtTitle': 'Reporter',
        'rtCode': 'Reporter Code',
        'rt3ISO': 'Reporter ISO',
        'ptTitle': 'Partner',
        'ptCode': 'Partner Code',
        'pt3ISO': 'Partner ISO',
        'TradeValue': 'Trade Value (US$)',
        'cmdDescE': 'Commodity',
        'cmdCode': 'Commodity Code'
    }
    
    req_url = api_url2(params_products)
    
    if fmt == 'csv':
        data_goods = pd.read_csv(req_url)
    else:
        resp = requests.get(req_url)
        resp_data = json.loads(resp.text)
        data_goods = pd.DataFrame(resp_data['dataset'])
        data_goods = data_goods.rename(columns = json_csv_cols)
    
    print(req_url)
    print('Years:', years, 'partners:', partners, 'records:', len(data_goods))
    
    if save:
        data_goods.to_csv(
            'data/goods_{partners}_{years}.csv'.format(
                partners = '-'.join( str(_) for _ in partners),
                years = '-'.join( str(_) for _ in years)
            ),
            index=False
        )
    
    return data_goods

In [40]:
empty_reqs = [
    #{'years': [2010, 2011, 2012, 2013, 2014], 'partners': [842, 860]},
    #{'years': [2005, 2006, 2007, 2008, 2009], 'partners': [458, 470, 490, 498, 528]},
    #{'years': [2010, 2011, 2012, 2013, 2014], 'partners': [458, 470, 490, 498, 528]},
    #{'years': [2005, 2006, 2007, 2008, 2009], 'partners': [124, 156, 192, 196, 203]},
    {'years': [2020], 'partners': [842, 860]},
]

In [41]:
for req in empty_reqs:
    
    df = load_goods(
        req['partners'],
        req['years'],
        fmt = 'json',
        save = True
    )
    
    time.sleep(30)

https://comtrade.un.org/api/get?max=5000&type=C&freq=A&px=HS&ps=2020&r=643&p=842%2C860&rg=1%2C2&cc=AG2&fmt=json
Years: [2020] partners: [842, 860] records: 349


#### MERGE GOODS DATA IN SINGLE FILE

In [30]:
import glob

In [42]:
cols = [
    'Year',
    'Trade Flow',
    'Reporter',
    'Reporter Code',
    'Reporter ISO',
    'Partner',
    'Partner Code',
    'Partner ISO',
    'Trade Value (US$)',
    'Commodity',
    'Commodity Code'
]

goods_data = pd.DataFrame([])

for f in glob.glob('data/goods_*.csv'):
    df = pd.read_csv(f, usecols = cols)
    print(f, len(df))
    
    goods_data = pd.concat([goods_data, df])

data/goods_208-218-233-246-251_2015-2016-2017-2018-2019.csv 3582
data/goods_579-616-642-688-699_2000-2001-2002-2003-2004.csv 2901
data/goods_276-292-300-344-348_2000-2001-2002-2003-2004.csv 3035
data/goods_56-76-92-100-112_2000-2001-2002-2003-2004.csv 2271
data/goods_360-364-372-376-381_2005-2006-2007-2008-2009.csv 3185
data/goods_458-470-490-498-528_2020.csv 671
data/goods_842-860_2005-2006-2007-2008-2009.csv 1704
data/goods_458-470-490-498-528_2000-2001-2002-2003-2004.csv 3019
data/goods_792-795-804-818-826_2015-2016-2017-2018-2019.csv 3862
data/goods_208-218-233-246-251_2005-2006-2007-2008-2009.csv 3489
data/goods_124-156-192-196-203_2000-2001-2002-2003-2004.csv 3483
data/goods_208-218-233-246-251_2000-2001-2002-2003-2004.csv 3541
data/goods_12-31-32-36-40_2010-2011-2012-2013-2014.csv 2889
data/goods_752-757-762-764-784_2020.csv 721
data/goods_752-757-762-764-784_2000-2001-2002-2003-2004.csv 3264
data/goods_458-470-490-498-528_2010-2011-2012-2013-2014.csv 3105
data/goods_12-31-32-36

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




data/goods_360-364-372-376-381_2015-2016-2017-2018-2019.csv 3575
data/goods_752-757-762-764-784_2015-2016-2017-2018-2019.csv 3559
data/goods_56-76-92-100-112_2010-2011-2012-2013-2014.csv 2941
data/goods_12-31-32-36-40_2000-2001-2002-2003-2004.csv 2721
data/goods_842-860_2015-2016-2017-2018-2019.csv 1751
data/goods_579-616-642-688-699_2010-2011-2012-2013-2014.csv 3754
data/goods_792-795-804-818-826_2005-2006-2007-2008-2009.csv 3660
data/goods_124-156-192-196-203_2005-2006-2007-2008-2009.csv 3304
data/goods_702-703-704-705-724_2020.csv 734
data/goods_276-292-300-344-348_2005-2006-2007-2008-2009.csv 2996
data/goods_702-703-704-705-724_2005-2006-2007-2008-2009.csv 3191
data/goods_56-76-92-100-112_2015-2016-2017-2018-2019.csv 3354
data/goods_702-703-704-705-724_2010-2011-2012-2013-2014.csv 3445
data/goods_702-703-704-705-724_2015-2016-2017-2018-2019.csv 3674
data/goods_458-470-490-498-528_2005-2006-2007-2008-2009.csv 2974
data/goods_276-292-300-344-348_2020.csv 642
data/goods_792-795-804-81

In [43]:
len(goods_data)

176767

In [46]:
goods_data.isnull().sum()

Commodity               0
Commodity Code          0
Partner                 0
Partner Code            0
Partner ISO          2771
Reporter                0
Reporter Code           0
Reporter ISO            0
Trade Flow              0
Trade Value (US$)       0
Year                    0
dtype: int64

In [49]:
goods_data.loc[goods_data['Partner ISO'].isnull()].head()

Unnamed: 0,Commodity,Commodity Code,Partner,Partner Code,Partner ISO,Reporter,Reporter Code,Reporter ISO,Trade Flow,Trade Value (US$),Year
6,Meat and edible meat offal,2,"Other Asia, nes",490,,Russian Federation,643,RUS,Export,32106,2020
11,"Fish and crustaceans, molluscs and other aquat...",3,"Other Asia, nes",490,,Russian Federation,643,RUS,Import,1537099,2020
16,Dairy produce; birds' eggs; natural honey; edi...,4,"Other Asia, nes",490,,Russian Federation,643,RUS,Export,12761,2020
21,Animal originated products; not elsewhere spec...,5,"Other Asia, nes",490,,Russian Federation,643,RUS,Export,4625583,2020
27,"Trees and other plants, live; bulbs, roots and...",6,"Other Asia, nes",490,,Russian Federation,643,RUS,Import,244164,2020


In [48]:
#goods_data.to_csv('data/goods-value-2000-2020.csv', index=False)