### Import modules

In [57]:
import json
import pandas as pd
import requests

### Helpful functions

In [58]:
def get_countries(code):
    '''
    Gets all the countries that export the product refered by a single code, 
    helpful to do the exact number of queries needed.
    
    Input: A code (string)
    
    Output: list of countries (integers)
    '''
    url = "http://comtrade.un.org/api/get?max=500&type=C&freq=A&px=HS&ps=now&r=all&p=0&rg=2&cc=" + str(code)
    f = requests.get(url, timeout=300)
    x = json.loads(f.text)
    df_prueba = pd.DataFrame(x['dataset'])
    df_prueba = df_prueba[['pfCode', 'period', 'aggrLevel', 'rgDesc', 'rtTitle', 'ptTitle', 'rtCode',
                       'cmdCode', 'cmdDescE', 'TradeQuantity', 'TradeValue']]
    paises = df_prueba['rtCode'].unique().tolist()
    list_paises = [paises[i:i + 5] for i in range(0, len(paises), 5)]
    return list_paises

def query(url):
    '''
    Executes a single query from the UN Comtrade API, converts json object
    into pandas dataframe.
    
    Input: A url (string)
    
    Output: Dataframe with the output of the query (pandas dataframe).
    '''
    f = requests.get(url, timeout=300)
    x = json.loads(f.text)
    df = pd.DataFrame(x['dataset'])
    df = df[['pfCode', 'period', 'aggrLevel', 'rgDesc', 'rtTitle', 'ptTitle',
                       'cmdCode', 'cmdDescE', 'TradeQuantity', 'TradeValue']]
    return df

Initialize dataframe called 'base'. Initialize lists (helpful to start at the place the last run ended, do not repeat, just run once) and load codes from csv in the directory.

In [64]:
data = []
base = pd.DataFrame(data, columns=['pfCode', 'period', 'aggrLevel', 'rgDesc', 'rtTitle', 'ptTitle',
                       'cmdCode', 'cmdDescE', 'TradeQuantity', 'TradeValue'])

codes_all = pd.read_csv('codes_chips.csv')
codes_all = codes_all.astype(str)
codes_all = codes_all['code'].to_list()
lst_check, lst_code = [], [0,0]

### Download data

Repeat the next chunk until all the codes are covered, you can check this down by checking the base dataframe.

In [94]:
for cod in codes_all:
    if cod not in lst_code:
        print(cod)
        for sublist in get_countries(cod):
            string = '' + str(sublist[0])
            for con in sublist[1:]:
                string = string + '%2C' + str(con)
            comb = str(cod) + str(string)
            if comb not in lst_check:
                url = "http://comtrade.un.org/api/get?max=50000&type=C&freq=A&px=HS&ps=2020&r=" + str(string) + "&p=all&rg=2&cc=" + str(cod)
                print(url)
                row = query(url)
                base = pd.concat([base, row])
                lst_check.append(comb)
                print(url, cod, comb)
        lst_code.append(cod)

293229


KeyboardInterrupt: 

Save to a csv just to have a back-up between runs of the code.

In [92]:
base.to_csv (r'base.csv', index = False, header=True)
base

Unnamed: 0,pfCode,period,aggrLevel,rgDesc,rtTitle,ptTitle,cmdCode,cmdDescE,TradeQuantity,TradeValue
0,H5,2020,6,Export,Australia,World,382491,"Chemical products, mixtures and preparations; ...",443606,2264569
1,H5,2020,6,Export,Australia,Bangladesh,382491,"Chemical products, mixtures and preparations; ...",12842,65557
2,H5,2020,6,Export,Australia,Canada,382491,"Chemical products, mixtures and preparations; ...",2402,12264
3,H5,2020,6,Export,Australia,Chile,382491,"Chemical products, mixtures and preparations; ...",839,4283
4,H5,2020,6,Export,Australia,China,382491,"Chemical products, mixtures and preparations; ...",8173,41725
...,...,...,...,...,...,...,...,...,...,...
79,H5,2020,6,Export,USA,Singapore,293622,"Vitamins; vitamin B1 and its derivatives, unmixed",4844,157734
80,H5,2020,6,Export,USA,Slovenia,293622,"Vitamins; vitamin B1 and its derivatives, unmixed",733,45287
81,H5,2020,6,Export,USA,Thailand,293622,"Vitamins; vitamin B1 and its derivatives, unmixed",2935,93139
82,H5,2020,6,Export,USA,United Arab Emirates,293622,"Vitamins; vitamin B1 and its derivatives, unmixed",58918,1085548


### Add stage of the value chain and save in a csv.

In [19]:
#Pedro's file
stages = pd.read_excel (r'Fracciones supply chains SC EV.xlsx', sheet_name='Semiconductors')
stages.rename(columns = {'HT6 Code':'cmdCode'}, inplace = True)
stages["cmdCode"] = stages["cmdCode"].astype('str')
base["cmdCode"] = base["cmdCode"].astype('str')

#Vlook up of phases provided and main base with HT6 Codes
final_df  = pd.merge(base, stages, on ='cmdCode', how ='left')
final_df = final_df[["pfCode", "period", "aggrLevel", "rgDesc", "rtTitle", "ptTitle", "cmdCode",
                     "cmdDescE", "Naic_descrip", "TradeQuantity", "TradeValue", "PHASE", "COMPLEXITY"]]
final_df.rename(columns = {'rtTitle':'From', 'ptTitle':'To'}, inplace = True)

#Print in a csv called final_df
final_df.to_csv (r'db_trade_f.csv', index = False, header=True)

### Correct that every code has the same description

In [105]:
final_df = pd.read_csv('db_trade_f_chips.csv')
grouped_df = final_df.groupby("cmdCode")
first_values = grouped_df.first()
first_values = first_values.reset_index()
first_values = first_values[['cmdCode', 'cmdDescE']]
first_values.rename(columns = {'cmdDescE':'des'}, inplace = True)
corrected_df  = pd.merge(final_df, first_values, on ='cmdCode', how ='left')
del corrected_df['cmdDescE']
corrected_df.rename(columns = {'des':'cmdDescE'}, inplace = True)
corrected_df.to_csv (r'db_trade_f.csv', index = False, header=True)