# Teste da API Comtrade

Info: https://comtradedeveloper.un.org

Specs: https://comtradedeveloper.un.org/api-details#api=comtrade-v1&operation=get-get



## Descrições dos códigos HS

Obter tabela em https://github.com/datasets/harmonized-system/blob/master/data/harmonized-system.csv

Copiar para directoria `support`


### Criar um dicionário com os códigos e descrições

In [2]:
import pandas as pd

hs_codes_df = pd.read_csv('support/harmonized-system.csv')
hs_codes_map = dict(zip(hs_codes_df.hscode, hs_codes_df.description))

#### Fazer um subconjunto com o nível 2 somente

In [3]:
hs_codes_l2 = hs_codes_df[hs_codes_df.level == 2]
hs_codes_l2[['hscode','description']]
hs_l2_map = dict(zip(hs_codes_l2.hscode, hs_codes_l2.description))



## Obter os dados de comtrade.un.org

### Parâmetros gerais que não mudam

In [38]:
import json
import requests
import pandas

m49_codes_cn_plp ={
    "024":"Angola",
    "076":"Brazil",
    "132":"Cabo Verde",
    "156":"China",
    "344":"China, Hong Kong",
    "446":"China, Macau",
    "226":"Guiné Equatorial",
    "624":"Guiné-Bissau",
    "508":"Moçambique",
    "620":"Portugal",
    "678":"São Tomé e Príncipe",
    "626":"Timor-Leste"
}
m49_china = "156"
m49_hong_kong = "344"
m49_macau = "446"
m49_plp = set(m49_codes_cn_plp.keys())-{m49_china,m49_hong_kong,m49_macau}
m49_plp_list = ",".join(m49_plp)

# Colunas mais interessantes do resultado
# escolher de 
#        'typeCode', 'freqCode', 'refPeriodId', 'refYear', 'refMonth',
#        'period', 'reporterCode', 'reporterISO', 'reporterDesc',
#        'flowCode', 'flowDesc', 'partnerCode', 'partnerISO', 'partnerDesc',
#        'partner2Code', 'partner2ISO', 'partner2Desc',
#        'classificationCode', 'classificationSearchCode',
#        'isOriginalClassification', 'cmdCode', 'cmdDesc', 'aggrLevel',
#        'isLeaf', 'customsCode', 'customsDesc', 'mosCode', 'motCode',
#        'motDesc', 'qtyUnitCode', 'qtyUnitAbbr', 'qty', 'isQtyEstimated',
#        'altQtyUnitCode', 'altQtyUnitAbbr', 'altQty', 'isAltQtyEstimated',
#        'netWgt', 'isNetWgtEstimated', 'grossWgt', 'isGrossWgtEstimated',
#        'cifvalue', 'fobvalue', 'primaryValue', 'legacyEstimationFlag',
#        'isReported', 'isAggregate', 'primaryValueFormated']

cols = ['typeCode','freqCode','reporterDesc','partnerDesc','refYear','refMonth','cmdCode','cmdDesc','flowCode','primaryValueFormated','primaryValue']
sort_order = ['reporterDesc','partnerDesc','refYear','refMonth']


### Função auxiliar para aceder à API un.comtrade


In [50]:

def call_uncomtrade(typeCode: str, freqCode: str, 
                    reporterCode: str = '049', 
                    partnerCode: str = '024,076,132,226,624,508,620,678,626',
                    period: str = None,
                    clCode: str = "HS",
                    cmdCode: str = "TOTAL",
                    flowCode: str = "M,X",
                    timeout: int = 10
                    )->pd.DataFrame:
    """ Makes a request to UN Comtrade API (public), returns a pandas DataFrame
    
    Parameters
        typeCode: required, C for commodities, S for Services
        freqCode: required, A for annual and M for monthly
        reporterCode: optional, default "049" (China)
        partnerCode: optional, default list of PLP codes, None for all countries
        period:  optional, aaaa or aaaamm default None (all available periods)
        clCode: Trade classifications: HS, SITC, BEC or EBOPS.
                Available values : HS, SS, B4, B5, EB, EB10, EB02, EBSDMX
        cmdCode: optional, default, "TOTAL"
        flowCode: optional, M=import, X=export,more: RX, RM, MIP, XIP, MOP, XOP, MIF, XIF, DX, FM; default "M,X"
        timeOut: int, max wait time in seconds. Default 10
     """

    baseUrl = "https://comtradeapi.un.org/public/v1"

    requestUrl=f"{baseUrl}/preview/{typeCode}/{freqCode}/{clCode}"
    resp = requests.get(requestUrl,
            {
            'reporterCode':reporterCode,
            'period':period,
            'partnerCode':partnerCode,
            'cmdCode':cmdCode,
            'flowCode':flowCode
            },
            timeout=timeout)
    results = json.loads(resp.content)['data']
    df = pd.DataFrame(results)
    # convert result country codes to three-digit codes
    df.reporterCode = df.reporterCode.astype('str').str.zfill(3)
    df.partnerCode = df.partnerCode.astype('str').str.zfill(3)
    df.reporterDesc = df.reporterCode.map(m49_codes_cn_plp)
    df.partnerDesc = df.partnerCode.map(m49_codes_cn_plp)
    # Map hscodes
    df.cmdDesc = df.cmdCode.map(hs_codes_map)
    df['primaryValueFormated'] = df.primaryValue.map('{:,}'.format)
    return df

### Parâmetros de cada chamada à API.

## Resultados: testes

In [68]:
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 500)

period = "2021" ## if freqCode M  use aaaamm
flow = "M,X"
df = call_uncomtrade("C",# C for commodities, S for Services
                     "A",# (freqCode) A for annual and M for monthly
                     flowCode=flow,
                     reporterCode=m49_china,
                     partnerCode=m49_plp_list,
                     period=period
                     )
df[cols].sort_values(sort_order)

Unnamed: 0,typeCode,freqCode,reporterDesc,partnerDesc,refYear,refMonth,cmdCode,cmdDesc,flowCode,primaryValueFormated,primaryValue
0,C,A,China,Angola,2021,52,TOTAL,Total of all HS2022 commodities,M,20908231317.0,20908230000.0
1,C,A,China,Angola,2021,52,TOTAL,Total of all HS2022 commodities,X,2491545681.0,2491546000.0
2,C,A,China,Brazil,2021,52,TOTAL,Total of all HS2022 commodities,M,109877876281.0,109877900000.0
3,C,A,China,Brazil,2021,52,TOTAL,Total of all HS2022 commodities,X,53612224393.0,53612220000.0
4,C,A,China,Cabo Verde,2021,52,TOTAL,Total of all HS2022 commodities,M,1146922.0,1146922.0
5,C,A,China,Cabo Verde,2021,52,TOTAL,Total of all HS2022 commodities,X,84537739.0,84537740.0
6,C,A,China,Guiné Equatorial,2021,52,TOTAL,Total of all HS2022 commodities,M,1215142465.0,1215142000.0
7,C,A,China,Guiné Equatorial,2021,52,TOTAL,Total of all HS2022 commodities,X,123984094.0,123984100.0
12,C,A,China,Guiné-Bissau,2021,52,TOTAL,Total of all HS2022 commodities,M,1111.0,1111.0
13,C,A,China,Guiné-Bissau,2021,52,TOTAL,Total of all HS2022 commodities,X,88878914.0,88878910.0


In [72]:
filename_note=period+flow.replace(",","_")  # change to append to filename
df[cols].to_excel(f"./downloads/dados_comtrade_{filename_note}.xlsx")

## China, importações mais importantes dos PLP

In [69]:
rank_filter = 5  # número de importações mais relevantes
years = "2020,2021"
pco_cols = ['reporterDesc','partnerDesc','refYear','rank','cmdDesc',
            'flowCode','primaryValueFormated']
df = call_uncomtrade("C",# C for commodities, S for Services
                     "A",# (freqCode) A for annual and M for monthly
                     flowCode="M",
                     cmdCode="AG2",
                     reporterCode=m49_china,
                     partnerCode=m49_plp_list,
                     period=years 
                     )

pco = df.sort_values(['partnerDesc','refYear','primaryValue'], ascending=[True,True,False])
pco['rank'] = pco.groupby(['partnerDesc','refYear'])["primaryValue"].rank(method="dense", ascending=False)
pco_top5 = pco[pco['rank'] <= rank_filter]
pco_top5[pco_cols].set_index(['reporterDesc','partnerDesc','refYear'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rank,cmdDesc,flowCode,primaryValueFormated
reporterDesc,partnerDesc,refYear,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
China,Angola,2020,1.0,"Mineral fuels, mineral oils and products of th...",M,14686950196.0
China,Angola,2020,2.0,"Salt; sulphur; earths, stone; plastering mater...",M,29428961.0
China,Angola,2020,3.0,"Natural, cultured pearls; precious, semi-preci...",M,20014740.0
China,Angola,2020,4.0,Copper and articles thereof,M,9938185.0
China,Angola,2020,5.0,"Ores, slag and ash",M,6763117.0
China,Angola,2021,1.0,"Mineral fuels, mineral oils and products of th...",M,20823873963.0
China,Angola,2021,2.0,"Salt; sulphur; earths, stone; plastering mater...",M,30420529.0
China,Angola,2021,3.0,"Ores, slag and ash",M,16912288.0
China,Angola,2021,4.0,Copper and articles thereof,M,14969851.0
China,Angola,2021,5.0,Aluminium and articles thereof,M,10401908.0


### Guardar formato excel


In [73]:
filename_note=years  # change to append to filename
pco_top5.to_excel(f"./downloads/china_plp_import_top5_{filename_note}.xlsx")

## China exportações mais importantes para os PLP

In [80]:
rank_filter = 5  # número de importações mais relevantes
years = "2021"
pco_cols = ['reporterDesc','partnerDesc','refYear','rank','cmdDesc',
            'flowCode','primaryValueFormated']
df = call_uncomtrade("C",# C for commodities, S for Services
                     "A",# (freqCode) A for annual and M for monthly
                     flowCode="X",
                     cmdCode="AG2",
                     reporterCode=m49_china,
                     partnerCode=m49_plp_list,
                     period=years 
                     )

pco = df.sort_values(['partnerDesc','refYear','primaryValue'], ascending=[True,True,False])
pco['rank'] = pco.groupby(['partnerDesc','refYear'])["primaryValue"].rank(method="dense", ascending=False)
pco_top5 = pco[pco['rank'] <= rank_filter]
pco_top5[pco_cols].set_index(['reporterDesc','partnerDesc','refYear'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rank,cmdDesc,flowCode,primaryValueFormated
reporterDesc,partnerDesc,refYear,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
China,Angola,2021,1.0,Electrical machinery and equipment and parts t...,X,300828698.0
China,Angola,2021,2.0,"Machinery and mechanical appliances, boilers, ...",X,250199330.0
China,Angola,2021,3.0,Vehicles; other than railway or tramway rollin...,X,193365426.0
China,Angola,2021,4.0,Plastics and articles thereof,X,168846759.0
China,Angola,2021,5.0,"Textiles, made up articles; sets; worn clothin...",X,167976610.0
China,Brazil,2021,1.0,Electrical machinery and equipment and parts t...,X,14148255403.0
China,Brazil,2021,2.0,"Machinery and mechanical appliances, boilers, ...",X,7851281383.0
China,Brazil,2021,3.0,Organic chemicals,X,4179568362.0
China,Brazil,2021,4.0,Iron and steel,X,2509608346.0
China,Brazil,2021,5.0,"Optical, photographic, cinematographic, measur...",X,2331482013.0


### Guardar formato excel


In [76]:
filename_note=years  # change to append to filename
pco_top5.to_excel(f"./downloads/china_plp_export_top5_{filename_note}.xlsx")

### China, detalhe das exportações mais importants dos PLP para a China

Em dois passos: 
* obtém as categorias de nível 2 mais importantes de cada país
* pesquisa todos as subcategorias de cada

In [81]:
rank_filter = 5  # número de importações mais relevantes
years = "2020,2021"
pco_cols = ['reporterDesc','partnerDesc','refYear','rank','cmdDesc',
            'flowCode','primaryValueFormated']
df = call_uncomtrade("C",# C for commodities, S for Services
                     "A",# (freqCode) A for annual and M for monthly
                     flowCode="M",
                     cmdCode="AG2",
                     reporterCode=m49_china,
                     partnerCode=m49_plp_list,
                     period=years 
                     )

pco = df.sort_values(['partnerDesc','refYear','primaryValue'], ascending=[True,True,False])
pco['rank'] = pco.groupby(['partnerDesc','refYear'])["primaryValue"].rank(method="dense", ascending=False)
pco_top5 = pco[pco['rank'] <= rank_filter]
# get the countries
countries = pco_top5.partnerDesc.unique()
country_cmd_top5_codes = dict()
for country in countries:
    l2_codes = pco_top5[pco_top5.partnerDesc == country]['cmdCode'].unique()
    print(country,l2_codes)
    hs_details = []
    for l2_code in l2_codes:
        l2_sub_codes = list(hs_codes_df[hs_codes_df.hscode.str.startswith(l2_code)]['hscode'])
        hs_details = hs_details + l2_sub_codes
    # print(hs_details)
    country_cmd_top5_codes[country] = hs_details.copy()


Angola ['27' '25' '71' '74' '26' '76']
Brazil ['26' '12' '27' '02' '47' '17']
Cabo Verde ['74' '84' '85' '99' '63' '26' '62']
Guiné Equatorial ['27' '44' '99' '63' '90' '84']
Guiné-Bissau ['49' '99' '85']
Moçambique ['26' '44' '12' '74' '71' '27']
Portugal ['85' '87' '61' '84' '25' '74']
São Tomé e Príncipe ['84' '90' '73' '85' '39' '99']
Timor-Leste ['12' '09' '38' '85' '44' '27' '14']


Exemplo dos códigos relevantes para o detalhe de Angola

In [83]:
country_cmd_top5_codes['Angola']

['27',
 '2701',
 '270111',
 '270112',
 '270119',
 '270120',
 '2702',
 '270210',
 '270220',
 '2703',
 '270300',
 '2704',
 '270400',
 '2705',
 '270500',
 '2706',
 '270600',
 '2707',
 '270710',
 '270720',
 '270730',
 '270740',
 '270750',
 '270791',
 '270799',
 '2708',
 '270810',
 '270820',
 '2709',
 '270900',
 '2710',
 '271012',
 '271019',
 '271020',
 '271091',
 '271099',
 '2711',
 '271111',
 '271112',
 '271113',
 '271114',
 '271119',
 '271121',
 '271129',
 '2712',
 '271210',
 '271220',
 '271290',
 '2713',
 '271311',
 '271312',
 '271320',
 '271390',
 '2714',
 '271410',
 '271490',
 '2715',
 '271500',
 '2716',
 '271600',
 '25',
 '2501',
 '250100',
 '2502',
 '250200',
 '2503',
 '250300',
 '2504',
 '250410',
 '250490',
 '2505',
 '250510',
 '250590',
 '2506',
 '250610',
 '250620',
 '2507',
 '250700',
 '2508',
 '250810',
 '250830',
 '250840',
 '250850',
 '250860',
 '250870',
 '2509',
 '250900',
 '2510',
 '251010',
 '251020',
 '2511',
 '251110',
 '251120',
 '2512',
 '251200',
 '2513',
 '251310',