# Insert Income/Gini tables into DB

In [3]:
import pandas as pd
import json
from sqlalchemy import create_engine

engine = create_engine('postgresql://datachile:yapoweon@hermes:5433/datachile')

In [40]:
df = pd.read_csv('./household_income/10_csv_final_files/household_income_tidy_all.csv')
df = df[(df['geography_level'] == 'comuna') | (df['geography_level'] == 'region')]
df[['geography_id']] = df[['geography_id']].apply(pd.to_numeric)

geographies = pd.read_sql('SELECT * FROM dim_comunas', engine)

In [41]:
comunas = df[df['geography_level'] == 'comuna']

ind_comunas = pd.merge(comunas, 
                       geographies,
                       left_on='geography_id', 
                       right_on='comuna_datachile_id')

to_rename = ['weighted_mean_income', 'weighted_median_income', 'weighted_gini_income',
             'ci_weighted_mean_income', 'ci_weighted_median_income', 'ci_weighted_gini_income']
ind_comunas = ind_comunas.rename(columns={k: 'comuna_' + k for k in to_rename})

In [43]:
regiones = df[df['geography_level'] == 'region']

ind_regiones = pd.merge(regiones,
                        ind_comunas,
                        left_on=['geography_id', 'year'],
                        right_on=['region_id', 'year'])

In [47]:
ind_regiones = ind_regiones[['year', 'comuna_datachile_id'] + \
                            to_rename + \
                            ['comuna_weighted_mean_income', 'comuna_weighted_median_income',
                            'comuna_weighted_gini_income', 'comuna_ci_weighted_mean_income',
                            'comuna_ci_weighted_median_income', 'comuna_ci_weighted_gini_income']] \
                            .rename(columns={k: 'region_' + k for k in to_rename})
    



Create columns for confidence bands

In [50]:
ci_cols = [c for c in ind_regiones.columns if '_ci_' in c]
for c in ci_cols:
    ind_regiones[c + '_lower'] = ind_regiones[c].apply(lambda v: json.loads(v)[0])
    ind_regiones[c + '_upper'] = ind_regiones[c].apply(lambda v: json.loads(v)[1])
    del ind_regiones[c]

Move data to DB

In [0]:
ind_regiones.to_sql('fact_income_gini', engine, schema='economy', if_exists='replace', index=False)

Create `date_id` column and join with the `dim_date` table

In [138]:

engine.execute("""
ALTER TABLE economy.fact_income_gini
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE economy.fact_income_gini
SET date_id = dim_date.id
FROM dim_date
WHERE dim_date.the_year = economy.fact_income_gini.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

<sqlalchemy.engine.result.ResultProxy at 0x1082d2d50>