### Main params

In [1]:
remote_path = 'http://pacha.datawheel.us/economia/nesi/analysis/3_wages_description/4_processed_wages_description/'
local_path = '../data/'

engine_path = 'postgresql://localhost:5432/datachile'

### Imports

In [2]:
from urllib import request
import zipfile
import shutil

import json
import pandas as pd
from sqlalchemy import create_engine

### Open file function

In [3]:
def loadFile(file_name):
    remote_file = remote_path + file_name
    local_file = local_path + file_name

    with request.urlopen(remote_file) as remote_csv,open(local_file, 'wb') as local_csv:
        shutil.copyfileobj(remote_csv, local_csv)
    
    return pd.read_csv(local_file,delimiter=",")

### By region & sex

In [4]:
df = loadFile('region_sex_positions_all.csv')
df = df.rename(columns={'sexo_id':'sex_id'})

### Relation tables

In [5]:
sexo_table = df.drop_duplicates(subset = ['sex_id'])
sexo_table[['sex_id','sexo']].sort_values('sex_id')

Unnamed: 0,sex_id,sexo
0,0,Hombre
1,1,Mujer


### Ingest

In [15]:
df = df[['year','sex_id','region_id','p50','p70','ci_p50','cs_p50','ci_p70','cs_p70']]
df = df.astype({'year':'int','sex_id':'int','region_id':'int','p50':'int','p70':'int','ci_p50':'int','cs_p50':'int','ci_p70':'int','cs_p70':'int'})

engine = create_engine(engine_path)
engine.execute("""
CREATE SCHEMA IF NOT EXISTS nesi; 
""")
df.to_sql('fact_income_year_region_sex', engine, schema='nesi', if_exists='replace', index=False)

In [7]:
### Foreign & indexes

In [16]:
engine.execute("""
ALTER TABLE nesi.fact_income_year_region_sex
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE nesi.fact_income_year_region_sex
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = nesi.fact_income_year_region_sex.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_year_region_sex_region_id_index 
ON nesi.fact_income_year_region_sex (region_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x10f809908>