# Mayors data

### Config

In [1]:
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

engine_path = config.get('DATABASE','engine_path')

### Params

In [2]:
remote_path = 'http://pacha.datawheel.us/municipios/'
local_path = '../data/'

### Imports

In [3]:
from urllib import request
import shutil
import os.path

import json
import pandas as pd
from sqlalchemy import create_engine

### Load function

In [4]:
def loadFile(file_name):
    remote_file = remote_path + file_name
    local_file = local_path + file_name

    if not os.path.isfile(local_file):
        with request.urlopen(remote_file) as remote_csv,open(local_file, 'wb') as local_csv:
            shutil.copyfileobj(remote_csv, local_csv)
    
    return pd.read_csv(local_file,delimiter=",")

### Load, select & rename

In [5]:
df = loadFile('alcaldes_municipales_2016.csv')
list(df)

['comuna_name',
 'candidato',
 'partido',
 'region_id',
 'region_name',
 'comuna_datachile_id',
 'comuna_customs_id',
 'comuna_tax_office_id',
 'partido_id']

In [6]:
df = df[['region_id','comuna_datachile_id','partido_id','candidato']]
df = df.rename(columns={'partido_id':'party_id','candidato':'candidate'});
df['start_year'] = 2017
df['end_year'] = 2020
list(df)

['region_id',
 'comuna_datachile_id',
 'party_id',
 'candidate',
 'start_year',
 'end_year']

In [7]:
df = df.astype({'region_id':'int','comuna_datachile_id':'int','party_id':'int'})

engine = create_engine(engine_path)
engine.execute("""CREATE SCHEMA IF NOT EXISTS politics""")
df.to_sql('fact_mayors', engine, schema='politics', if_exists='replace', index=False)

### Indexes

In [8]:
engine.execute("""
ALTER TABLE politics.fact_mayors
  ADD COLUMN start_date_id INTEGER; 
""")

engine.execute("""
ALTER TABLE politics.fact_mayors
  ADD COLUMN end_date_id INTEGER; 
""")

engine.execute("""
UPDATE politics.fact_mayors
SET start_date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = politics.fact_mayors.start_year
      AND dim_date.month_of_year = 12
      AND dim_date.day_of_month = 6
""")

engine.execute("""
UPDATE politics.fact_mayors
SET end_date_id = dim_date.id
FROM public.dim_date
WHERE dim_date.the_year = politics.fact_mayors.end_year
      AND dim_date.month_of_year = 12
      AND dim_date.day_of_month = 6
""")

engine.execute("""
CREATE INDEX fact_mayors_region_id 
ON politics.fact_mayors (region_id)
""")

engine.execute("""
CREATE INDEX fact_mayors_comuna_datachile_id
ON politics.fact_mayors (comuna_datachile_id)
""")

engine.execute("""
CREATE INDEX fact_mayors_party_id
ON politics.fact_mayors (party_id)
""")

<sqlalchemy.engine.result.ResultProxy at 0x1181a1748>

### Related dimensions

In [9]:
remote_path = 'http://pacha.datawheel.us/ids_oficiales/'
dim1 = loadFile('partidos_id.csv')
dim1 = dim1.rename(columns={'partido_id':'party_id','partido':'party'});
dim1

Unnamed: 0,party,party_id
0,PDC,1
1,RN,2
2,Amplitud,3
3,IND,4
4,UDI,5
5,PRSD,6
6,PPD,7
7,PCCH,8
8,PSCH,9
9,MAS Región,10
