In [2]:
notebook_id = "122"

<a id="ID_top"></a>
## UNCOMTRADE API extractor

This workflow generates (part1), maintains (part2) and exports (part3)

#### Notebook sections:
    
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

#### Import all packages that could be required

In [48]:
# %load s_package_import.py
# package library, use to ensure consistency across notebooks, refresh periodically
# general packages
import os # use with os.listdir(_path_)
import requests
import csv
import time
from datetime import datetime
from shutil import copyfile

#temp check
#from shutil import make_archive
import zipfile #notebook

# data analysis packages
import pandas as pd
pd.options.display.max_columns = None # don't truncate columns
pd.options.display.max_rows = 150

import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import descartes
import pycountry

# custom scripts
import s_file_export
import s_filepaths
import s_un_comtrade_extract as s_un
import s_adj_matrix_plot

#=== network analysis
import networkx as nx
#=== gavity modelling
import gme as gme

#=== distance datasets
import wbdata


#### Import module and declare path variables
`import s_filepaths.py`

In [5]:
# import ref file
import s_filepaths

# declare local variables to work with
path_raw = s_filepaths.path_raw
path_raw_dl = s_filepaths.path_raw_dl
path_store = s_filepaths.path_store
path_live = s_filepaths.path_live

<a id="ID_part1"></a>
### Part 1 | Load in scope countries
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

In [6]:
# View all files in live directory
print(os.listdir(f"{path_live}"))

['121_input_di_matrix.csv.gzip', '.DS_Store', '112_input_gme_data.csv.gzip', '125_input_scope_country.csv.gzip', '120_input_scope_data.csv.gzip', '2_raw_explainer_doc.md', '121_input_de_matrix.csv.gzip', '113_input_bri_members.csv.gzip']


In [40]:
# load scope countries
filename = "120_input_scope_data.csv.gzip"
df_scope = pd.read_csv(f"{path_live}{filename}",compression = "gzip")

# isolate list of countries
list_countries = pd.DataFrame(list(df_scope.iso3_d.unique()),columns=["iso3"])
list_countries.head()

Unnamed: 0,iso3
0,ARE
1,AZE
2,BEL
3,CHN
4,CZE


**Generate UN code reference document**

In [9]:
create_ref_doc = False

In [10]:
if create_ref_doc:
    # test run, country 4 (AFG) as test case with only TOTAL trade for one year (2010), with no copy of file
    un_extract = s_un.f_un_comtrade_data(p_r_country = ["4"],p_p_country = ["all"],p_ps_years=["2010"],p_extra = "cc=TOTAL")
    s_file_export.f_df_export(un_extract[0][0],"un_com_0_test_ref",p_copy=False,p_loc1 = path_raw_dl)
else:
    print("Skipped")

Skipped


**Match scope countries to UN reference IDs**

In [24]:
# open raw file
print(os.listdir(f"{path_raw}"))
un_ref_file = pd.read_csv(f"{path_raw}un_codes_ref.csv.gzip",compression = "gzip")
un_ref_file = un_ref_file.loc[:,["pt3ISO","ptCode","ptTitle"]].copy() # only keep relevant columns
un_ref_file.set_index("pt3ISO", inplace= True) # set new index for matching
un_ref_file.head() # preview

['un_codes_ref.csv.gzip', 'BRI_countries_online_MANUAL.csv', '.DS_Store', '0_raw_explainer_doc.md', 'bri_countries_manual_2020.csv.gzip', '121_raw_wb_distance_2016.csv.gzip', '1_auto_download', '112_raw_gme_raw_site_dl.csv.gzip']


Unnamed: 0_level_0,ptCode,ptTitle
pt3ISO,Unnamed: 1_level_1,Unnamed: 2_level_1
WLD,0,World
AFG,4,Afghanistan
DZA,12,Algeria
ATG,28,Antigua and Barbuda
AZE,31,Azerbaijan


In [54]:
# matched list
df_merge = list_countries.merge(un_ref_file,how = "left",left_on="iso3",right_on=un_ref_file.index).copy()
df_merge.dropna(inplace = True)

# convert values
converted_list = []
for entry in df_merge.ptCode:
    temp_entry = str(int(entry))
    converted_list.append(temp_entry)
    
df_merge["ptCode"] = converted_list
df_merge.head()

Unnamed: 0,iso3,ptCode,ptTitle
0,ARE,784,United Arab Emirates
1,AZE,31,Azerbaijan
2,BEL,56,Belgium
3,CHN,156,China
4,CZE,203,Czechia


<a id="ID_part2"></a>
### Part 2 | Extract UN data based on list
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

In [22]:
# URL settings
url_comma = "%2C"
url_add = "&"

extra_cc = f"cc=TOTAL"

In [58]:
# for every country is scope download data

df_collection = []
length = len(df_merge.ptCode)

for index,entry in enumerate(list(df_merge.ptCode)):
    #=== reporting
    temp_entry_name = list(df_merge.ptTitle)[index]
    print(f"Working on | {temp_entry_name} | {index+1}/{length} (~{round(((index+1)/length)*100)}%)")
    
    #=== run functions to extract
    dl_year = "2016"
    un_extract = s_un.f_un_comtrade_data(p_r_country = [str(int(entry))],p_p_country = ["all"],p_ps_years=[dl_year],p_extra = extra_cc)
            
    try:
        s_file_export.f_df_export(un_extract[0][0],f"un_com_{temp_entry_name}_{dl_year}_ref",p_copy=False,p_loc1=path_raw_dl,p_loc1_pre="dl_")
        df_collection.append(un_extract[0][0])
    except:
        df_collection.append(("Missing",entry))

Working on | United Arab Emirates | 1/84 (~1%)
WORKING ON | Country 784| URL https://comtrade.un.org/api/get?r=784&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_United Arab Emirates_2016_ref_20200809_1322.csv | COMPLETE
COPY   | SKIP
Working on | Azerbaijan | 2/84 (~2%)
WORKING ON | Country 31| URL https://comtrade.un.org/api/get?r=31&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Azerbaijan_2016_ref_20200809_1322.csv | COMPLETE
COPY   | SKIP
Working on | Belgium | 3/84 (~4%)
WORKING ON | Country 56| URL https://comtrade.un.org/api/get?r=56&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Belgium_2016_ref_20200809_1322.csv | COMPLETE
COPY   | SKIP
Working on | China | 4/84 (~5%)
WORKING ON | Country 156| URL https://comtrade.un.org/api/get?r=156&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Ch

OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Bhutan_2016_ref_20200809_1324.csv | COMPLETE
COPY   | SKIP
Working on | Switzerland | 33/84 (~39%)
WORKING ON | Country 757| URL https://comtrade.un.org/api/get?r=757&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Switzerland_2016_ref_20200809_1324.csv | COMPLETE
COPY   | SKIP
Working on | Germany | 34/84 (~40%)
WORKING ON | Country 276| URL https://comtrade.un.org/api/get?r=276&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Germany_2016_ref_20200809_1324.csv | COMPLETE
COPY   | SKIP
Working on | Denmark | 35/84 (~42%)
WORKING ON | Country 208| URL https://comtrade.un.org/api/get?r=208&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Denmark_2016_ref_20200809_1324.csv | COMPLETE
COPY   | SKIP
Working on | Estonia | 36/84 (~43%)
WORKING ON | Country 233| URL https://comtrade.un.o

Export | ../Data/0_raw/1_auto_download/dl_un_com_Netherlands_2016_ref_20200809_1326.csv | COMPLETE
COPY   | SKIP
Working on | Norway | 64/84 (~76%)
WORKING ON | Country 579| URL https://comtrade.un.org/api/get?r=579&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Norway_2016_ref_20200809_1326.csv | COMPLETE
COPY   | SKIP
Working on | Nepal | 65/84 (~77%)
WORKING ON | Country 524| URL https://comtrade.un.org/api/get?r=524&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Nepal_2016_ref_20200809_1327.csv | COMPLETE
COPY   | SKIP
Working on | Pakistan | 66/84 (~79%)
WORKING ON | Country 586| URL https://comtrade.un.org/api/get?r=586&p=all&freq=A&ps=2016&cc=TOTAL
OBLIGATORY PAUSE
Export | ../Data/0_raw/1_auto_download/dl_un_com_Pakistan_2016_ref_20200809_1327.csv | COMPLETE
COPY   | SKIP
Working on | Poland | 67/84 (~80%)
WORKING ON | Country 616| URL https://comtrade.un.org/api/get?r=616&p=all&fr

In [59]:
# check number of entries (should be 93 regardless)
len(df_collection)
for entry in df_collection:
    if len(entry) == 2:
        print(entry)
    else:
        pass

In [64]:
# rename
dl_year = "2016"
# merge all dataframes
df_un_com_master = pd.concat(df_collection)
# save entire download to download folder
s_file_export.f_df_export(df_un_com_master,f"un_com_{dl_year}",p_copy= False,p_loc1=path_raw_dl,p_loc1_pre="dl_",p_file_id=f"{notebook_id}_")

Export | ../Data/0_raw/1_auto_download/122_dl_un_com_2016_20200809_1334.csv | COMPLETE
COPY   | SKIP


<a id="ID_part3"></a>
### Part 3 | Refine UN dataset
|| [0| Default imports](#ID_top) || [1|Part1](#ID_part1) || [2|Part2](#ID_part2) || [3|Part3](#ID_part3) || [4|Part4](#ID_part4) || [5|Part5](#ID_part5) ||

In [65]:
            # Partner / reporter info (6)
columns =   ["rtCode","rt3ISO","rtTitle","ptCode","pt3ISO","ptTitle",
             # period and trade category and value information (3)
             "period","rgDesc","yr",
             # duplicate info? (6)
             "rgCode","cmdCode","TradeValue","periodDesc","pfCode","cmdDescE"]

df_un_com_focused = df_un_com_master.loc[:,columns]
df_un_com_focused.head()

Unnamed: 0,rtCode,rt3ISO,rtTitle,ptCode,pt3ISO,ptTitle,period,rgDesc,yr,rgCode,cmdCode,TradeValue,periodDesc,pfCode,cmdDescE
0,784,ARE,United Arab Emirates,0,WLD,World,2016,Import,2016,1,TOTAL,270779148786,2016,H4,All Commodities
1,784,ARE,United Arab Emirates,0,WLD,World,2016,Export,2016,2,TOTAL,295046691148,2016,H4,All Commodities
2,784,ARE,United Arab Emirates,0,WLD,World,2016,Re-Export,2016,3,TOTAL,145112606991,2016,H4,All Commodities
3,784,ARE,United Arab Emirates,4,AFG,Afghanistan,2016,Import,2016,1,TOTAL,244526143,2016,H4,All Commodities
4,784,ARE,United Arab Emirates,4,AFG,Afghanistan,2016,Export,2016,2,TOTAL,1596610735,2016,H4,All Commodities


In [66]:
# save year specific data frame
s_file_export.f_df_export(df_un_com_focused,f"un_com_{dl_year}_mini",p_file_id=f"{notebook_id}_")

Export | ../Data/1_raw_processed_backup/122_store_un_com_2016_mini_20200809_1336.csv | COMPLETE
COPY   | ../Data/2_raw_processed_input/122_input_un_com_2016_mini.csv.gzip | COMPLETE
