# Import & Load CASEN gini data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

In [2]:
remote_path = 'http://pacha.datawheel.us/economia/casen/analysis/2_household_income/5_final_tables/'
local_path = '../data/'

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Country data

In [4]:
df1 = download_file(remote_path,local_path,'country_data_all.csv')
list(df1)

Already downloaded. Using: ../data/country_data_all.csv
Encoding: ascii


['year',
 'geography_unit',
 'geography_id',
 'weighted_mean_income',
 'weighted_median_income',
 'weighted_gini_income',
 'lb_weighted_mean_income',
 'ub_weighted_mean_income',
 'lb_weighted_median_income',
 'ub_weighted_median_income',
 'lb_weighted_gini_income',
 'ub_weighted_gini_income']

In [5]:
df1 = df1.astype({'year':'int','geography_id':'int','weighted_mean_income':'int','weighted_median_income':'int','lb_weighted_mean_income':'int','ub_weighted_mean_income':'int','lb_weighted_median_income':'int','ub_weighted_median_income':'int'})

engine = create_engine(engine_path)
df1.to_sql('fact_income_gini_country_casen', engine, schema='economy', if_exists='replace', index=False)

In [6]:
engine.execute("""
ALTER TABLE economy.fact_income_gini_country_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini_country_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_gini_country_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")


<sqlalchemy.engine.result.ResultProxy at 0x110c57828>

## Region data

In [7]:
df2 = download_file(remote_path,local_path,'region_data_all.csv')
list(df2)

Already downloaded. Using: ../data/region_data_all.csv
Encoding: ascii


['year',
 'geography_unit',
 'geography_id',
 'weighted_mean_income',
 'weighted_median_income',
 'weighted_gini_income',
 'lb_weighted_mean_income',
 'ub_weighted_mean_income',
 'lb_weighted_median_income',
 'ub_weighted_median_income',
 'lb_weighted_gini_income',
 'ub_weighted_gini_income']

In [8]:
df2 = df2.astype({'year':'int','geography_id':'int','weighted_mean_income':'int','weighted_median_income':'int','lb_weighted_mean_income':'int','ub_weighted_mean_income':'int','lb_weighted_median_income':'int','ub_weighted_median_income':'int'})

engine = create_engine(engine_path)
df2.to_sql('fact_income_gini_region_casen', engine, schema='economy', if_exists='replace', index=False)

In [9]:
engine.execute("""
ALTER TABLE economy.fact_income_gini_region_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini_region_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_gini_region_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_gini_region_casen_ix 
ON economy.fact_income_gini_region_casen (geography_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x110c57a90>

### Comuna data

In [10]:
df3 = download_file(remote_path,local_path,'comuna_data_all.csv')
list(df3)

Already downloaded. Using: ../data/comuna_data_all.csv
Encoding: ascii


['year',
 'geography_unit',
 'geography_id',
 'weighted_mean_income',
 'weighted_median_income',
 'weighted_gini_income',
 'lb_weighted_mean_income',
 'ub_weighted_mean_income',
 'lb_weighted_median_income',
 'ub_weighted_median_income',
 'lb_weighted_gini_income',
 'ub_weighted_gini_income']

In [11]:
df3 = df3.astype({'year':'int','geography_id':'int','weighted_mean_income':'int','weighted_median_income':'int','lb_weighted_mean_income':'int','ub_weighted_mean_income':'int','lb_weighted_median_income':'int','ub_weighted_median_income':'int'})

engine = create_engine(engine_path)
df3.to_sql('fact_income_gini_comuna_casen', engine, schema='economy', if_exists='replace', index=False)

In [12]:
engine.execute("""
ALTER TABLE economy.fact_income_gini_comuna_casen
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini_comuna_casen
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = economy.fact_income_gini_comuna_casen.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_income_gini_comuna_casen_ix 
ON economy.fact_income_gini_comuna_casen (geography_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x110c57898>