# This notebook extracts and saves preliminary data from https://comtrade.un.org/

In [5]:
import pandas as pd
import numpy
import requests
import json
import codecs
import pickle
from collections import defaultdict
from datetime import datetime
from dateutil.relativedelta import relativedelta
import time


In [6]:
# Get country codes
country_codes = requests.get('https://comtrade.un.org/Data/cache/reporterAreas.json')
country_codes = country_codes.text.encode().decode('utf-8-sig') 
country_codes = json.loads(country_codes)["results"]

with open('datasets/country_codes.pickle', 'wb') as handle:
    pickle.dump(country_codes, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [7]:
# Get monthly data availability for all countries
data_availability = requests.get('https://comtrade.un.org/api/refs/da/view?type=C&freq=M')

country_availability = defaultdict(list)

for i in data_availability.json():
    country_availability[i['r']].append(datetime.strptime(i['ps'], '%Y%m').date())

In [8]:
# Define datelist that will be the timeframe for our analysis
startDate = '2011-01'
endDate = '2020-12'

start = datetime.strptime(startDate, '%Y-%m').date()
end = datetime.strptime(endDate, '%Y-%m').date()

datelist = []

while start < end:
    datelist.append(start)
    start += relativedelta(months=1)

In [9]:
# Define how many months can be missing from a given country in the timeframe
# Obtain list of countries to which we have sufficient data
max_missing_months = 0
countries_with_sufficient_data = []

for country, dates in country_availability.items():
    missing_months = 0
    for j in datelist:
        if j not in dates:
            missing_months+=1
    if missing_months<=max_missing_months:
        countries_with_sufficient_data.append(country)

len(countries_with_sufficient_data)

66

In [37]:
#Export countries in batches of 5
countries_string_in_fives = []
for i in range(int(len(countries_with_sufficient_data)/5)+1):
    countries_string_in_fives.append("%2C".join(countries_with_sufficient_data[i*5:i*5+5]))

exported_data_list = []
for element1 in countries_string_in_fives:
    for element2 in countries_string_in_fives:
        exported_data = requests.get(f'https://comtrade.un.org/api/get?max=100000&type=C&freq=M&px=HS&ps=all&r={element1}&p={element2}&rg=2&cc=TOTAL')
        if exported_data.status_code==200:
            exported_data_list.append(exported_data)
        else:
            print(element1, element2)
            break
        time.sleep(40)

with open('datasets/exported_data_list.pickle', 'wb') as handle:
    pickle.dump(exported_data_list, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [38]:
len(exported_data_list)

196

In [57]:
final_values = []

for export in exported_data_list:
    exported_data = export.json()['dataset']
    for entry in exported_data:
        final_values.append([entry['rtCode'], entry['ptCode'], entry['period'], entry['TradeValue']])

final_df = pd.DataFrame(final_values, columns=['From', 'To', 'Period', 'Value'])

datelist_string = [int(i.strftime('%Y%m')) for i in datelist]
final_df = final_df[final_df['Period'].isin(datelist_string)].copy()

final_df.to_csv('datasets/trade_data.csv')