In [11]:
import re
import time

import numpy as np
from matplotlib import pyplot as plt
import requests
import plotly.graph_objects as go
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select

BASE_INFORMATION_LINK = "https://www.camara.cl/camara/diputado_detalle.aspx?prmid="
TRANSPARENCY_LINK = "https://www.camara.cl/camara/transparencia_diputado.aspx?prmId="

chrome_options = Options()
chrome_options.add_argument("no-sandbox")
chrome_options.add_argument("headless")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("--disable-notifications")
    

months = [
    "enero",
    "febrero",
    "marzo",
    "abril",
    "mayo",
    "junio",
    "julio",
    "agosto",
    "septiembre",
    "octubre",
    "noviembre",
    "diciembre",
]

# Information gathering functions

In [12]:
def get_basic_info(driver, id):
    PROFILE_LINK = BASE_INFORMATION_LINK + str(id)
    diputado = {"id": id}
    driver.get(PROFILE_LINK)
    name = driver.find_element_by_tag_name("h3").text
    test = driver.find_elements_by_css_selector("div[class=summary] > p")
    diputado["nombre"] = " ".join(
        [sname.lower().capitalize() for sname in name.split(" ")][1:]
    )
    diputado["email"] = driver.find_element_by_css_selector("li[class=email] > a").text
    diputado["comunas"] = test[0].text
    diputado["distrito"] = test[1].text
    diputado["region"] = test[2].text
    diputado["comite"] = test[3].text
    try:
        facebook = driver.find_element_by_css_selector(
            "li[class=facebook] > a"
        ).get_attribute("href")
        diputado["facebook"] = facebook
    except Exception as E:
        diputado["facebook"] = None
    # NOTE: Some have or dont have facebook
    try:
        twitter = driver.find_element_by_css_selector(
            "li[class=twitter] > a"
        ).get_attribute("href")
        diputado["twitter"] = twitter
    except Exception as E:
        diputado["twitter"] = None
    print("Finished getting biographical data")
    return diputado


def get_expenditure_data(driver, id):
    SPECIFIC_TRANSPARENCY_LINK = TRANSPARENCY_LINK + str(id)

    driver.get(SPECIFIC_TRANSPARENCY_LINK)
    time.sleep(0.25)
    select_dates = driver.find_elements_by_css_selector(
        "div[id=ctl00_mainPlaceHolder_UpdatePanel1] > p "
    )
    gastos_mensuales = []
    for x in [2018, 2019]:
        select2 = Select(
                driver.find_element_by_xpath(
                    "//select[@name='ctl00$mainPlaceHolder$ddlAno']"
                )
            )
        select2.select_by_value(str(x))
        for i in range(1, 13):
            time.sleep(0.25)
            select = Select(
                driver.find_element_by_xpath(
                    "//select[@name='ctl00$mainPlaceHolder$ddlMes']"
                )
            )
            select.select_by_value(str(i))
            time.sleep(0.25)
            table = driver.find_elements_by_css_selector(
                "table[id=table_gasop] > tbody > tr"
            )
            costos = {}
            costos["year"] = x
            costos["month"] = months[i - 1].capitalize()
            gasto_total = 0
            for row in table:
                cells = row.find_elements_by_tag_name("td")
                costos[cells[0].text.lower().capitalize()] = int(
                    cells[1].text.replace(".", "")
                )
                gasto_total += int(cells[1].text.replace(".", ""))
            costos["total"] = gasto_total
            gastos_mensuales.append(costos)
    print("Finished getting expenditure")
    return gastos_mensuales




# Main Function Call

In [31]:
def get_all_info(id):
    driver = webdriver.Chrome("./chromedriver", options=chrome_options)
    id_diputado = id
    diputado = get_basic_info(driver, id_diputado)
    gastos = get_expenditure_data(driver, id_diputado)
    time.sleep(2)
    driver.quit()
    formatted_labels = ['18-'+month for month in months]
    [formatted_labels.append('19-'+month) for month in months]
    all_data = {}
    keys = []
    for gasto in gastos:
        current_keys = gasto.keys()
        for key in current_keys:
            if key not in keys and key != 'year' and key != 'month' and key != 'total':
                all_data[key] = []
    for gasto in gastos:
        for key in all_data.keys():
            try:
                all_data[key].append(gasto[key])
            except:
                all_data[key].append(0)
    graph_data = []
    fig = go.Figure()
    for key in all_data.keys():
        fig.add_trace(go.Bar(name=key, x=formatted_labels, y=all_data[key]))
    graph_title = 'Gastos de %s ' % diputado["nombre"]
    fig.update_layout(barmode='stack', showlegend=False, autosize=True, title=go.layout.Title(text=graph_title))
    fig.show()

In [32]:
get_all_info(1009)

Finished getting biographical data
Finished getting expenditure


In [15]:
get_all_info(974)

Finished getting biographical data
Finished getting expenditure


In [16]:
get_all_info(1030)

Finished getting biographical data
Finished getting expenditure


In [17]:
get_all_info(926)

Finished getting biographical data
Finished getting expenditure


In [18]:
get_all_info(1011)

Finished getting biographical data
Finished getting expenditure


In [33]:
get_all_info(1081)

Finished getting biographical data
Finished getting expenditure
