In [1]:
remote_path = 'http://pacha.datawheel.us/economia/casen/analysis/3_household_income/5_final_tables/'
local_path = '../data/'

engine_path = 'postgresql://localhost:5432/datachile'

In [2]:
from urllib import request
import zipfile
import shutil
import os.path

import json
import pandas as pd
from sqlalchemy import create_engine

In [3]:
def loadFile(file_name):
    remote_file = remote_path + file_name
    local_file = local_path + file_name

    if not os.path.isfile(local_file):
        with request.urlopen(remote_file) as remote_csv,open(local_file, 'wb') as local_csv:
            shutil.copyfileobj(remote_csv, local_csv)
    
    return pd.read_csv(local_file,delimiter=",")

### Country data

In [4]:
df1 = loadFile('country_data_all.csv')
list(df1)

['year',
 'country_id',
 'weighted_mean_income',
 'weighted_median_income',
 'weighted_gini_income',
 'lb_weighted_mean_income',
 'ub_weighted_mean_income',
 'lb_weighted_median_income',
 'ub_weighted_median_income',
 'lb_weighted_gini_income',
 'ub_weighted_gini_income']

In [5]:
df1 = df1.astype({'year':'int','country_id':'int','weighted_mean_income':'int','weighted_median_income':'int','lb_weighted_mean_income':'int','ub_weighted_mean_income':'int','lb_weighted_median_income':'int','ub_weighted_median_income':'int'})

engine = create_engine(engine_path)
df1.to_sql('fact_income_gini_country_casen', engine, schema='economy', if_exists='replace', index=False)

In [6]:
engine.execute("""
ALTER TABLE economy.fact_income_gini_country_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini_country_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_gini_country_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")


<sqlalchemy.engine.result.ResultProxy at 0x112730550>

## Region data

In [7]:
df2 = loadFile('region_data_all.csv')
list(df2)

['year',
 'region_id',
 'weighted_mean_income',
 'weighted_median_income',
 'weighted_gini_income',
 'lb_weighted_mean_income',
 'ub_weighted_mean_income',
 'lb_weighted_median_income',
 'ub_weighted_median_income',
 'lb_weighted_gini_income',
 'ub_weighted_gini_income']

In [8]:
df2 = df2.astype({'year':'int','region_id':'int','weighted_mean_income':'int','weighted_median_income':'int','lb_weighted_mean_income':'int','ub_weighted_mean_income':'int','lb_weighted_median_income':'int','ub_weighted_median_income':'int'})

engine = create_engine(engine_path)
df2.to_sql('fact_income_gini_region_casen', engine, schema='economy', if_exists='replace', index=False)

In [9]:
engine.execute("""
ALTER TABLE economy.fact_income_gini_region_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini_region_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_gini_region_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_gini_region_casen_ix 
ON economy.fact_income_gini_region_casen (region_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x1127308d0>

### Comuna data

In [10]:
df3 = loadFile('comuna_data_all.csv')
list(df3)

['year',
 'comuna_id',
 'weighted_mean_income',
 'weighted_median_income',
 'weighted_gini_income',
 'lb_weighted_mean_income',
 'ub_weighted_mean_income',
 'lb_weighted_median_income',
 'ub_weighted_median_income',
 'lb_weighted_gini_income',
 'ub_weighted_gini_income']

In [11]:
df3 = df3.astype({'year':'int','comuna_id':'int','weighted_mean_income':'int','weighted_median_income':'int','lb_weighted_mean_income':'int','ub_weighted_mean_income':'int','lb_weighted_median_income':'int','ub_weighted_median_income':'int'})

engine = create_engine(engine_path)
df3.to_sql('fact_income_gini_comuna_casen', engine, schema='economy', if_exists='replace', index=False)

In [12]:
engine.execute("""
ALTER TABLE economy.fact_income_gini_comuna_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini_comuna_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_gini_comuna_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_gini_comuna_casen_ix 
ON economy.fact_income_gini_comuna_casen (comuna_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x1128fcc50>