In [20]:
import pandas as pd
import sys
import configparser
import os
import numpy as np
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

import postgres #from local file postgres.py
import commons
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py
from importlib import reload

import json
from sqlalchemy import create_engine
import requests

engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)

# Read Pacha's HS12

In [2]:
hs = pd.read_csv('pacha_hs12.csv')


In [3]:
level5 = hs[hs.hs12.str.len()==6]
level5 = level5.rename(columns={'hs12': 'level5', 'description': 'level5_en'})
level5.loc[:, 'level3'] = level5.level5.apply(lambda h: h[:4])

In [4]:
level3 = hs[hs.hs12.str.len()==4]
level3 = level3.rename(columns={'hs12': 'level3', 'description': 'level3_en'})
level3 = level3.merge(level5, on='level3')
level3.loc[:, 'level2'] = level3.level3.apply(lambda h: h[:2])

In [5]:
level2 = hs[hs.hs12.str.len() == 2]
level2 = level2.rename(columns={'hs12': 'level2', 'description': 'level2_en'})
level2 = level2.merge(level3, on='level2')

# HS12 from `europa.eu`

In [6]:
cols = ['CNKEY', 'CN', 'PURE_HS_CODE', 'LEVEL', 'EN', 'ES']
dtype = {c: str for c in cols}
dtype['LEVEL'] = int
hseu = pd.read_csv('hs2012.csv', usecols=cols, dtype=dtype)
hseu = hseu[hseu.PURE_HS_CODE.notnull()]
hseu.loc[:, 'FIXED_HS'] = hseu.PURE_HS_CODE.apply(lambda l: l.replace('.', ''))
es_hs = hseu[['FIXED_HS', 'ES']]

In [None]:
level2 = level2.merge(es_hs, left_on='level2', right_on='FIXED_HS')

In [8]:
level2 = level2.rename(columns={'ES': 'level2_es'})
level2.loc[:, 'level2_es'] = level2.level2_es.apply(lambda h: h.split(' - ')[1].title())

In [14]:
level2 = level2.merge(es_hs, left_on='level3', right_on='FIXED_HS', how='left')
level2 = level2.rename(columns={'ES': 'level3_es'})

In [17]:
level2 = level2.merge(es_hs, left_on='level5', right_on='FIXED_HS', how='left')
level2 = level2.rename(columns={'ES': 'level5_es'})

# Update names from OEC

In [32]:
oec_xlations = pd.DataFrame(requests.get("https://atlas.media.mit.edu/attr/hs92/es/").json()['data'])


In [56]:
oec_6 = oec_xlations[(oec_xlations.id.str.len() == 6)]

In [57]:
#oec_6['level3'] = oec_6.id.apply(lambda h: h[2:])
oec_6.loc


Unnamed: 0,color,display_id,icon,id,image,image_author,image_link,keywords,name,palette,weight
1,#FFE999,0101,/static/img/icons/hs/hs_01.png,010101,/static/img/headers/hs/010101.jpg,James Marvin Phelps,https://flic.kr/p/gMG1YC,"equino, cebra, culo, burro, mulas",Caballos,"[""#e7f9fa"",""#533127"",""#907b65"",""#713928"",""#b66...",2.689205e+09
2,#FFE999,0102,/static/img/icons/hs/hs_01.png,010102,/static/img/headers/hs/010102.jpg,U.S. Department of Agriculture,https://flic.kr/p/o63k4J,"ganado, búfalo, bisonte, vaca, toro, buey, toro",Bovino,"[""#fbfbfb"",""#402a1a"",""#976034"",""#2c2420"",""#be6...",8.346649e+09
3,#FFE999,0103,/static/img/icons/hs/hs_01.png,010103,/static/img/headers/hs/01.jpg,,,cerdo,Cerdos,,3.694108e+09
4,#FFE999,0104,/static/img/icons/hs/hs_01.png,010104,/static/img/headers/hs/01.jpg,,,"espolón, oveja, cordero, carnero castrado, ref...",Ovejas y cabras,,2.141696e+09
5,#FFE999,0105,/static/img/icons/hs/hs_01.png,010105,/static/img/headers/hs/01.jpg,,,"pollo, gallina, pavo, pato, ganso",Aves de corral,,2.889115e+09
6,#FFE999,0106,/static/img/icons/hs/hs_01.png,010106,/static/img/headers/hs/01.jpg,,,"animales del parque zoológico, animales domést...",Otros animales,,1.306225e+09
7,#FFE999,0201,/static/img/icons/hs/hs_01.png,010201,/static/img/headers/hs/010201.jpg,U.S. Department of Agriculture,https://flic.kr/p/kUwC6M,"carne de res, filete, carne roja",Carne Bovina,"[""#b4605a"",""#e6dfd4"",""#120c09"",""#f7e9d0"",""#020...",2.087768e+10
8,#FFE999,0202,/static/img/icons/hs/hs_01.png,010202,/static/img/headers/hs/010202.jpg,Mike,https://flic.kr/p/5YxzLS,"carne de res, filete, carne roja",Congelados Carne de Bovino,"[""#fcfdfc"",""#523a38"",""#8b7b77"",""#2c1516"",""#a89...",2.093388e+10
9,#FFE999,0203,/static/img/icons/hs/hs_01.png,010203,/static/img/headers/hs/010203.jpg,Mike,https://flic.kr/p/nTxu3s,"jamón, carne de cerdo, tocino",Carne de cerdo,"[""#0e0d0d"",""#ecd7ca"",""#c89e81"",""#fcf7f4"",""#eca...",2.505188e+10
10,#FFE999,0204,/static/img/icons/hs/hs_01.png,010204,/static/img/headers/hs/01.jpg,,,"cordero, carne de cordero",Ovino y caprino,,6.342547e+09


In [19]:
db.to_sql(level2[['level2', 'level2_en', 'level2_es', 'level3', 'level3_en', 'level3_es', 'level5', 'level5_en', 'level5_es']], 
          'economy', 
          'dim_hs2012')

engine.execute("""
CREATE INDEX level3_idx 
ON economy.dim_hs2012 (level3)
""")

engine.execute("""
CREATE INDEX level5_idx 
ON economy.dim_hs2012 (level5)
""")



DROP TABLE IF EXISTS economy.dim_hs2012;
CREATE TABLE "economy"."dim_hs2012" (
"level2" TEXT,
  "level2_en" TEXT,
  "level2_es" TEXT,
  "level3" TEXT,
  "level3_en" TEXT,
  "level3_es" TEXT,
  "level5" TEXT,
  "level5_en" TEXT,
  "level5_es" TEXT
)
COPY "economy"."dim_hs2012" ("level2","level2_en","level2_es","level3","level3_en","level3_es","level5","level5_en","level5_es") FROM STDIN WITH CSV HEADER DELIMITER ',';


<sqlalchemy.engine.result.ResultProxy at 0x1106d0eb8>