### Import modules

In [1]:
import json
import pandas as pd
import requests

### Helpful functions

In [259]:
def get_countries(code):
    '''
    Gets all the countries that export the product refered by a single code, 
    helpful to do the exact number of queries needed.
    
    Input: A code (string)
    
    Output: list of countries (integers)
    '''
    url = "http://comtrade.un.org/api/get?max=500&type=C&freq=A&px=HS&ps=now&r=all&p=0&rg=2&cc=" + str(code)
    f = requests.get(url, timeout=300)
    x = json.loads(f.text)
    df_prueba = pd.DataFrame(x['dataset'])
    df_prueba = df_prueba[['pfCode', 'period', 'aggrLevel', 'rgDesc', 'rtTitle', 'ptTitle', 'rtCode',
                       'cmdCode', 'cmdDescE', 'TradeQuantity', 'TradeValue']]
    paises = df_prueba['rtCode'].unique().tolist()
    list_paises = [paises[i:i + 5] for i in range(0, len(paises), 5)]
    return list_paises

def query(url):
    '''
    Executes a single query from the UN Comtrade API, converts json object
    into pandas dataframe.
    
    Input: A url (string)
    
    Output: Dataframe with the output of the query (pandas dataframe).
    '''
    f = requests.get(url, timeout=300)
    x = json.loads(f.text)
    df = pd.DataFrame(x['dataset'])
    df = df[['pfCode', 'period', 'aggrLevel', 'rgDesc', 'rtTitle', 'ptTitle',
                       'cmdCode', 'cmdDescE', 'TradeQuantity', 'TradeValue']]
    return df

Initialize dataframe called 'base'. Initialize lists (helpful to start at the place the last run ended, do not repeat, just run once) and load codes from csv in the directory.

In [153]:
data = []
base = pd.DataFrame(data, columns=['pfCode', 'period', 'aggrLevel', 'rgDesc', 'rtTitle', 'ptTitle',
                       'cmdCode', 'cmdDescE', 'TradeQuantity', 'TradeValue'])

codes_all = pd.read_csv('codes.csv')
codes_all = codes_all.astype(str)
codes_all = codes_all['code'].to_list()
lst_check, lst_code = [], [0,0]

### Download data

Repeat the next chunk until all the codes are covered, you can check this down by checking the base dataframe.

In [503]:
for cod in codes_all:
    if cod not in lst_code:
        print(cod)
        for sublist in get_countries(cod):
            string = '' + str(sublist[0])
            for con in sublist[1:]:
                string = string + '%2C' + str(con)
            comb = str(cod) + str(string)
            if comb not in lst_check:
                url = "http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=" + str(string) + "&p=all&rg=2&cc=" + str(cod)
                print(url)
                row = query(url)
                base = pd.concat([base, row])
                lst_check.append(comb)
                print(url, cod, comb)
        lst_code.append(cod)

853290
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=616%2C620%2C642%2C688%2C699&p=all&rg=2&cc=853290
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=616%2C620%2C642%2C688%2C699&p=all&rg=2&cc=853290 853290 853290616%2C620%2C642%2C688%2C699
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=702%2C703%2C704%2C705%2C710&p=all&rg=2&cc=853290
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=702%2C703%2C704%2C705%2C710&p=all&rg=2&cc=853290 853290 853290702%2C703%2C704%2C705%2C710
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=724%2C752%2C757%2C764%2C792&p=all&rg=2&cc=853290
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=724%2C752%2C757%2C764%2C792&p=all&rg=2&cc=853290 853290 853290724%2C752%2C757%2C764%2C792
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=807%2C826%2C842%2C858%2C484&p=all&rg=2&cc=853290
http://comtrade.un.org/api/get

http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=752%2C757%2C764%2C792%2C804&p=all&rg=2&cc=854150 854150 854150752%2C757%2C764%2C792%2C804
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=807%2C826%2C842%2C858%2C268&p=all&rg=2&cc=854150
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=807%2C826%2C842%2C858%2C268&p=all&rg=2&cc=854150 854150 854150807%2C826%2C842%2C858%2C268
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=484%2C818&p=all&rg=2&cc=854150
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=484%2C818&p=all&rg=2&cc=854150 854150 854150484%2C818
851890
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=36%2C40%2C51%2C52%2C56&p=all&rg=2&cc=851890
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=36%2C40%2C51%2C52%2C56&p=all&rg=2&cc=851890 851890 85189036%2C40%2C51%2C52%2C56
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=

http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=620%2C642%2C688%2C699%2C702&p=all&rg=2&cc=903082 903082 903082620%2C642%2C688%2C699%2C702
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=703%2C704%2C705%2C710%2C724&p=all&rg=2&cc=903082
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=703%2C704%2C705%2C710%2C724&p=all&rg=2&cc=903082 903082 903082703%2C704%2C705%2C710%2C724
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=752%2C757%2C764%2C792%2C804&p=all&rg=2&cc=903082
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=752%2C757%2C764%2C792%2C804&p=all&rg=2&cc=903082 903082 903082752%2C757%2C764%2C792%2C804
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=807%2C826%2C842%2C484%2C818&p=all&rg=2&cc=903082
http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=807%2C826%2C842%2C484%2C818&p=all&rg=2&cc=903082 903082 903082807%2C826%2C842%2C484%2C

Save to a csv just to have a back-up between runs of the code.

In [504]:
base.to_csv (r'base.csv', index = False, header=True)
base

### Add stage of the value chain and save in a csv.

In [19]:
#Pedro's file
stages = pd.read_excel (r'Fracciones supply chains SC EV.xlsx', sheet_name='Semiconductors')
stages.rename(columns = {'HT6 Code':'cmdCode'}, inplace = True)
stages["cmdCode"] = stages["cmdCode"].astype('str')
base["cmdCode"] = base["cmdCode"].astype('str')

#Vlook up of phases provided and main base with HT6 Codes
final_df  = pd.merge(base, stages, on ='cmdCode', how ='left')
final_df = final_df[["pfCode", "period", "aggrLevel", "rgDesc", "rtTitle", "ptTitle", "cmdCode",
                     "cmdDescE", "Naic_descrip", "TradeQuantity", "TradeValue", "PHASE", "COMPLEXITY"]]
final_df.rename(columns = {'rtTitle':'From', 'ptTitle':'To'}, inplace = True)

#Print in a csv called final_df
final_df.to_csv (r'db_trade_f.csv', index = False, header=True)