In [None]:
import pandas as pd
import sys
import configparser
import os
import numpy as np
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

import postgres #from local file postgres.py
import commons
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py
from importlib import reload

import json
from sqlalchemy import create_engine
import requests

engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)

# Read Pacha's HS12

In [None]:
hs = pd.read_csv('pacha_hs12.csv')


In [None]:
level5 = hs[hs.hs12.str.len()==6]
level5 = level5.rename(columns={'hs12': 'level5', 'description': 'level5_en'})
level5.loc[:, 'level3'] = level5.level5.apply(lambda h: h[:4])

In [None]:
level3 = hs[hs.hs12.str.len()==4]
level3 = level3.rename(columns={'hs12': 'level3', 'description': 'level3_en'})
level3 = level3.merge(level5, on='level3')
level3.loc[:, 'level2'] = level3.level3.apply(lambda h: h[:2])

In [None]:
level2 = hs[hs.hs12.str.len() == 2]
level2 = level2.rename(columns={'hs12': 'level2', 'description': 'level2_en'})
level2 = level2.merge(level3, on='level2')

# HS12 from `europa.eu`

In [None]:
cols = ['CNKEY', 'CN', 'PURE_HS_CODE', 'LEVEL', 'EN', 'ES']
dtype = {c: str for c in cols}
dtype['LEVEL'] = int
hseu = pd.read_csv('hs2012.csv', usecols=cols, dtype=dtype)
hseu = hseu[hseu.PURE_HS_CODE.notnull()]
hseu.loc[:, 'FIXED_HS'] = hseu.PURE_HS_CODE.apply(lambda l: l.replace('.', ''))
es_hs = hseu[['FIXED_HS', 'ES']]

In [None]:
level2 = level2.merge(es_hs, left_on='level2', right_on='FIXED_HS')

level2 = level2.rename(columns={'ES': 'level2_es'})
level2.loc[:, 'level2_es'] = level2.level2_es.apply(lambda h: h.split(' - ')[1].title())

level2 = level2.merge(es_hs, left_on='level3', right_on='FIXED_HS', how='left')
level2 = level2.rename(columns={'ES': 'level3_es'})

level2 = level2.merge(es_hs, left_on='level5', right_on='FIXED_HS', how='left')
level2 = level2.rename(columns={'ES': 'level5_es'})

### Add section names

In [None]:
import roman

sections = list(hseu[hseu.LEVEL==1].iterrows())
chapters = hseu[hseu.LEVEL == 2]

for (i,s1), (j,s2) in zip(sections, sections[1:]):
    chapters.loc[(chapters.index >= i) & (chapters.index < j),'level0_es'] = s1.ES
    chapters.loc[(chapters.index >= i) & (chapters.index < j),'level0_en'] = s1.EN
    chapters.loc[(chapters.index >= i) & (chapters.index < j),'level0'] = format(roman.fromRoman(s1.CN), '02d')

In [None]:
flattened_hs2012 = level2.merge(chapters, left_on='level2', right_on='CN')[['level0', 'level0_en', 'level0_es', 'level2', 'level2_en', 'level2_es', 'level3', 'level3_en', 'level3_es', 'level5', 'level5_en', 'level5_es']]

flattened_hs2012.loc[flattened_hs2012.level0.isnull(), 'level0_en'] = "SECTION XXI - WORKS OF ART, COLLECTORS' PIECES AND ANTIQUES"
flattened_hs2012.loc[flattened_hs2012.level0.isnull(), 'level0_es'] = "SECCIÓN XXI - OBJETOS DE ARTE O COLECCIÓN Y ANTIGÜEDADES"
flattened_hs2012.loc[flattened_hs2012.level0.isnull(), 'level0'] = 21


# Update names from OEC

### Spanish Labels

In [None]:
oec_xlations = pd.DataFrame(requests.get("https://atlas.media.mit.edu/attr/hs92/es/").json()['data'])

In [None]:
oec_sections = oec_xlations[(oec_xlations.id.str.len() == 2)]
flattened_hs2012 = flattened_hs2012.merge(oec_sections, left_on='level0', right_on='id', how='left')
flattened_hs2012.loc[:, 'level0_es'] = flattened_hs2012.apply(lambda h: h['level0_es'] if h['name'] is None else h['name'], axis=1)

flattened_hs2012.loc[flattened_hs2012.level0_es.isnull(), 'level0_es'] = 'Obras de Arte'

flattened_hs2012.loc[:, 'level2'] = flattened_hs2012.apply(lambda h: str(h['level0']) + h['level2'], axis=1)
flattened_hs2012.loc[:, 'level3'] = flattened_hs2012.apply(lambda h: str(h['level0']) + h['level3'], axis=1)
flattened_hs2012.loc[:, 'level5'] = flattened_hs2012.apply(lambda h: str(h['level0']) + h['level5'], axis=1)

flattened_hs2012 = flattened_hs2012[['level0', 'level0_en', 'level0_es', 'level2', 'level2_en', 'level2_es', 'level3', 'level3_en', 'level3_es', 'level5', 'level5_en', 'level5_es']]

In [None]:
oec_6 = oec_xlations[(oec_xlations.id.str.len() == 6)]
flattened_hs2012 = flattened_hs2012.merge(oec_6, left_on='level3', right_on='id', how='left')

flattened_hs2012.loc[:, 'level3_es'] = flattened_hs2012.apply(lambda h: h['level3_es'] if h['name'] is None else h['name'], axis=1)


### English labels

In [None]:
oec_xlations = pd.DataFrame(requests.get("https://atlas.media.mit.edu/attr/hs92/en/").json()['data'])
oec_sections = oec_xlations[(oec_xlations.id.str.len() == 2)]
flattened_hs2012 = flattened_hs2012.merge(oec_sections, left_on='level0', right_on='id', how='left')
flattened_hs2012.loc[:, 'level0_en'] = flattened_hs2012.apply(lambda h: h['level0_en'] if h['name'] is None else h['name'], axis=1)
flattened_hs2012.loc[flattened_hs2012.level0_en.isnull(), 'level0_en'] = 'Works of Art'

In [None]:
oec_6 = oec_xlations[(oec_xlations.id.str.len() == 6)]
flattened_hs2012 = flattened_hs2012.merge(oec_6, left_on='level3', right_on='id', how='left')
flattened_hs2012.loc[:, 'level3_en'] = flattened_hs2012.apply(lambda h: h['level3_en'] if h['name'] is None else h['name'], axis=1)
flattened_hs2012 = flattened_hs2012[['level0', 'level0_en', 'level0_es', 'level2', 'level2_en', 'level2_es', 'level3', 'level3_en', 'level3_es', 'level5', 'level5_en', 'level5_es']]

In [None]:
sections

In [None]:
db.to_sql(flattened_hs2012, 
          'economy', 
          'dim_hs2012')

engine.execute("""
CREATE INDEX level3_idx 
ON economy.dim_hs2012 (level3)
""")

engine.execute("""
CREATE INDEX level5_idx 
ON economy.dim_hs2012 (level5)
""")

