# Import & Load population data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/population/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [4]:
df = download_file(remote_path,local_path,'population_2005_2020.csv')
#df = df.rename(columns={'zona':'zone_id','region':'region_id','household_weights':'weights'});
df = df.astype({'sex_id':'int','age_range_id':'int','year':'int','count':'int'})
list(df)

Downloading... http://pacha.datawheel.us/population/population_2005_2020.csv
Encoding: ascii


['year', 'comuna_datachile_id', 'sex_id', 'age_range_id', 'count']

### Ingest

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'demographics', 'fact_population_estimate')

DROP TABLE IF EXISTS demographics.fact_population_estimate;
CREATE TABLE "demographics"."fact_population_estimate" (
"year" INTEGER,
  "comuna_datachile_id" INTEGER,
  "sex_id" INTEGER,
  "age_range_id" INTEGER,
  "count" INTEGER
)
COPY "demographics"."fact_population_estimate" ("year","comuna_datachile_id","sex_id","age_range_id","count") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Indexes & FK

In [6]:
engine.execute("""
CREATE INDEX fact_population_estimate_comuna_datachile_id 
ON demographics.fact_population_estimate (comuna_datachile_id)
""")

engine.execute("""
ALTER TABLE demographics.fact_population_estimate
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE demographics.fact_population_estimate
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = demographics.fact_population_estimate.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

<sqlalchemy.engine.result.ResultProxy at 0x11839c908>

### Related dim age Range - Diferent from 

In [7]:
d0 = download_file(remote_path,local_path,'age_range_id.csv')
print (inline_table_xml(d0, 'age_range', 'age_range_id', 'age_range'))

Downloading... http://pacha.datawheel.us/population/age_range_id.csv
Encoding: ascii

<InlineTable alias="age_range">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">age_0_4</Value>
      <Value column="es_description">age_0_4</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">age_5_9</Value>
      <Value column="es_description">age_5_9</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">age_10_14</Value>
      <Value column="es_description">age_10_14</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">age_15_19</Value>
      <Value column="es_description">age_15_19</Value>
    </Row>
  <Row>
      <Value column="id">5</Value>
      <Value 

### Use common dim for sex_id

```xml
<Dimension name="Sex">
    <Hierarchy hasAll="true" primaryKey="sex_id">
        <InlineTable alias="dim_sex">
          <ColumnDefs>
            <ColumnDef name="sex_id" type="Numeric"/>
            <ColumnDef name="description" type="String"/>
            <ColumnDef name="es_description" type="String"/>
          </ColumnDefs>
          <Rows>
            <Row>
              <Value column="sex_id">0</Value>
              <Value column="description">N/A</Value>
              <Value column="es_description">N/A</Value>
            </Row>
            <Row>
              <Value column="sex_id">2</Value>
              <Value column="description">Male</Value>
              <Value column="es_description">Masculino</Value>
            </Row>
            <Row>
              <Value column="sex_id">1</Value>
              <Value column="description">Female</Value>
              <Value column="es_description">Femenino</Value>
            </Row>
          </Rows>
        </InlineTable>
        <Level name="Sex" column="sex_id" nameColumn="description" uniqueMembers="true">
          <Annotations>
            <Annotation name="es_caption">Description ES</Annotation>
          </Annotations>
          <Property name="Description ES" column="es_description" />
        </Level>
    </Hierarchy>
  </Dimension>
```