### Main params

In [1]:
remote_path = 'http://pacha.datawheel.us/economia/nesi/nesi_all_years/'
local_path = '../data/'

engine_path = 'postgresql://localhost:5432/datachile'

### Imports

In [2]:
from urllib import request
import zipfile
import shutil
import os.path

import json
import pandas as pd
from sqlalchemy import create_engine

### Open file function

In [3]:
def loadFile(file_name):
    remote_file = remote_path + file_name
    local_file = local_path + file_name

    if not os.path.isfile(local_file):
        with request.urlopen(remote_file) as remote_csv,open(local_file, 'wb') as local_csv:
            shutil.copyfileobj(remote_csv, local_csv)
    
    return pd.read_csv(local_file,delimiter=",")

### Load & rename

In [4]:
df = loadFile('nesi_all_years.csv')
list(df)

['year',
 'comuna_datachile_id',
 'region_id',
 'income',
 'fact',
 'sex_id',
 'journey_id',
 'isco_id',
 'icse_id',
 'isced_id']

### Ingest

In [5]:
df = df.astype({'year':'int','comuna_datachile_id':'int','region_id':'int','income':'int','sex_id':'int','journey_id':'int','isco_id':'int','icse_id':'int','isced_id':'int'})

engine = create_engine(engine_path)
df.to_sql('fact_income_nesi', engine, schema='economy', if_exists='replace', index=False)

In [6]:
### Foreign & indexes

In [7]:
engine.execute("""
ALTER TABLE economy.fact_income_nesi
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_nesi
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_nesi.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_nesi_region_id 
ON economy.fact_income_nesi (region_id)
""")

engine.execute("""
CREATE INDEX fact_income_nesi_comuna_datachile_id
ON economy.fact_income_nesi (comuna_datachile_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x109cca2e8>