# Import & Load Election results data

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/datachile/polls/04_todos/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load file

In [4]:
df = download_file(remote_path,local_path,'resultados_votaciones.csv')

#df = df[['year','election_type','comuna_datachile_id','partido_id','mesa_id','candidato_id','votos']]
df = df.rename(columns={'votos':'votes','candidato_id':'candidate_id','partido_id':'party_id','distrito_id':'district_id','election_type':'election_id','circunscripcion_id':'circunscription_id','lugar':'place','electo':'elected'})

df[['region_id']] = df[['region_id']].fillna(value=-1)
df[['circunscription_id']] = df[['circunscription_id']].fillna(value=-1)
df[['district_id']] = df[['district_id']].fillna(value=-1)
df[['comuna_datachile_id']] = df[['comuna_datachile_id']].fillna(value=-1)

df = df.astype({'comuna_datachile_id':'int', 'votes':'int','party_id':'int','place':'int','election_id':'int','year':'int','candidate_id':'int','district_id':'int','circunscription_id':'int','region_id':'int'})

list(df)

Downloading... http://pacha.datawheel.us/datachile/polls/04_todos/resultados_votaciones.csv


['year',
 'election_id',
 'region_id',
 'circunscription_id',
 'district_id',
 'comuna_datachile_id',
 'candidate_id',
 'party_id',
 'votes',
 'place',
 'elected']

### Ingest

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'politics', 'fact_election_results')

DROP TABLE IF EXISTS politics.fact_election_results;
CREATE TABLE "politics"."fact_election_results" (
"year" INTEGER,
  "election_id" INTEGER,
  "region_id" INTEGER,
  "circunscription_id" INTEGER,
  "district_id" INTEGER,
  "comuna_datachile_id" INTEGER,
  "candidate_id" INTEGER,
  "party_id" INTEGER,
  "votes" INTEGER,
  "place" INTEGER,
  "elected" INTEGER
)
COPY "politics"."fact_election_results" ("year","election_id","region_id","circunscription_id","district_id","comuna_datachile_id","candidate_id","party_id","votes","place","elected") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Update empty values

In [6]:
engine.execute("""
UPDATE politics.fact_election_results SET region_id = NULL where region_id = -1;
""")

engine.execute("""
UPDATE politics.fact_election_results SET circunscription_id = NULL where circunscription_id = -1;
""")

engine.execute("""
UPDATE politics.fact_election_results SET district_id = NULL where district_id = -1;
""")

engine.execute("""
UPDATE politics.fact_election_results SET comuna_datachile_id = NULL where comuna_datachile_id = -1;
""")

<sqlalchemy.engine.result.ResultProxy at 0x10a8c44a8>

### Indexes & FK

In [7]:
engine.execute("""
ALTER TABLE politics.fact_election_results
  ADD COLUMN date_id INTEGER; 
""")

engine.execute("""
UPDATE politics.fact_election_results
SET date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = politics.fact_election_results.year
      AND dim_date.month_of_year = 1
      AND dim_date.day_of_month = 1
""")

engine.execute("""
CREATE INDEX fact_election_results_comuna_datachile_id 
ON politics.fact_election_results (comuna_datachile_id)
""")

engine.execute("""
CREATE INDEX fact_election_results_region_id 
ON politics.fact_election_results (region_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x10a8c4630>

### Related dim

In [8]:
d1 = download_file(remote_path,local_path,'election_id.csv')
print (inline_table_xml(d1, 'election', 'election_id', 'election'))

Downloading... http://pacha.datawheel.us/datachile/polls/04_todos/election_id.csv

<InlineTable alias="election">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">Presidente - Primera vuelta</Value>
      <Value column="es_description">Presidente - Primera vuelta</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Presidente - Segunda vuelta</Value>
      <Value column="es_description">Presidente - Segunda vuelta</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Senador</Value>
      <Value column="es_description">Senador</Value>
    </Row>
  <Row>
      <Value column="id">4</Value>
      <Value column="description">Diputado</Value>
      <Value column="es_description">Diputado</Valu

In [9]:
d2 = download_file(remote_path,local_path,'partidos_id.csv')
print (inline_table_xml(d2, 'partido', 'partido_id', 'partido'))

Downloading... http://pacha.datawheel.us/datachile/polls/04_todos/partidos_id.csv

<InlineTable alias="partido">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">1</Value>
      <Value column="description">Partido Comunista de Chile (PCCh)</Value>
      <Value column="es_description">Partido Comunista de Chile (PCCh)</Value>
    </Row>
  <Row>
      <Value column="id">2</Value>
      <Value column="description">Partido Radical Socialdemocrata (PRSD)</Value>
      <Value column="es_description">Partido Radical Socialdemocrata (PRSD)</Value>
    </Row>
  <Row>
      <Value column="id">3</Value>
      <Value column="description">Partido Regionalista de los Independientes (PRI)</Value>
      <Value column="es_description">Partido Regionalista de los Independientes (PRI)</Value>
    </Row>
  <Row>
      <Value column="i