This notebook obtains voting data from the Chilean Chamber of Deputies. This data were analyzed in the paper "Parliamentary roll-call voting as a complex dynamical system: The case of Chile" by Diego Morales-Bader, Ramón D. Castillo, Ralf F. A. Cox, and Carlos Ascencio-Garrido.

In [1]:
from xml.dom import minidom
import pandas as pd
from urllib.request import urlopen
import datetime

In [2]:
# Lists of fields to be downloaded
Id_List = []
Fecha_List = []
Desc_List = []
Si_List = []
No_List = []
Abs_List = []
Disp_List = []
Resultado_List = []
Tipo_List = []
Quorum_List = []

# Dataframe
df = pd.DataFrame(columns=['ID', "Fecha", 'Descripción', "Si", "No", "Abstención", "Dispensado", "Resultado", "Tipo", "Quorum"])

# Range of years to download
from_year = 2002
to_year = 2021

# iterate over years
for i in range(from_year, to_year + 1):
    # Link to the data
    url_str = "http://opendata.camara.cl/camaradiputados/WServices/WSLegislativo.asmx/retornarVotacionesXAnno?prmAnno=" + str(i)
    url = urlopen(url_str) # open the url
    mydoc = minidom.parse(url) # parse the content
    # Get the elements
    Votaciones = mydoc.getElementsByTagName("Votacion")
    for votacion in Votaciones:
        id = votacion.getElementsByTagName("Id")[0]
        fecha = votacion.getElementsByTagName("Fecha")[0]
        desc = votacion.getElementsByTagName("Descripcion")[0]
        si = votacion.getElementsByTagName("TotalSi")[0]
        no = votacion.getElementsByTagName("TotalNo")[0]
        abs = votacion.getElementsByTagName("TotalAbstencion")[0]
        disp = votacion.getElementsByTagName("TotalDispensado")[0]

        try:
            resultado = votacion.getElementsByTagName("Resultado")[0]
            Resultado_List.append(str(resultado.firstChild.nodeValue))
        except:
            Resultado_List.append("")

        tipo = votacion.getElementsByTagName("Tipo")[0]
        quorum = votacion.getElementsByTagName("Quorum")[0]

        # Store the elements in the lists
        Id_List.append(str(id.firstChild.data))
        Fecha_List.append(str(fecha.firstChild.data))
        Desc_List.append(str(desc.firstChild.data))
        Si_List.append(str(si.firstChild.data))
        No_List.append(str(no.firstChild.data))
        Abs_List.append(str(abs.firstChild.data))
        Disp_List.append(str(disp.firstChild.data))
        Tipo_List.append(str(tipo.firstChild.nodeValue))
        Quorum_List.append(str(quorum.firstChild.nodeValue))

    print(i)

# Asiggn each list to the dataframe
df["ID"] = Id_List
df["Fecha"] = Fecha_List
df["Descripción"] = Desc_List
df["Si"] = Si_List
df["No"] = No_List
df["Abstención"] = Abs_List
df["Dispensado"] = Disp_List
df["Resultado"] = Resultado_List
df["Tipo"] = Tipo_List
df["Quorum"] = Quorum_List

2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021


In [3]:
# Convert string to int
df = df.astype({"Si":"int", "No":"int", "Abstención":"int", "Dispensado":"int"})
# convert the 'Fecha' column to datetime format
df['Fecha'] = pd.to_datetime(df['Fecha'])

In [4]:
df.head()

Unnamed: 0,ID,Fecha,Descripción,Si,No,Abstención,Dispensado,Resultado,Tipo,Quorum
0,1990,2002-12-19 12:06:00,Boletín N° 2625-07,65,0,0,0,,Proyecto de Ley,Quórum Simple
1,1986,2002-12-19 10:50:00,Boletín N° 3107-05,44,1,1,0,,Proyecto de Ley,Quórum Simple
2,1984,2002-12-18 13:57:48,INFORME COMISION INVESTIGADORA-Solicitar a S.E...,56,0,0,0,,Otros,Quórum Simple
3,1983,2002-12-18 13:47:09,Proyecto de Acuerdo N° 171,74,0,1,0,,Proyecto de Acuerdo,Quórum Simple
4,1982,2002-12-18 13:35:36,Boletín N° 2727-11,85,0,0,0,,Proyecto de Ley,Quórum Simple


In [5]:
# Compute the agreement ratio
def agreement_ratio(row):  
    if row['Si'] > row['No']:
        total = row['Si'] + row['No'] + row['Abstención']
        if total > 0:
            ar = row['Si'] / total
            return ar
    elif row['Si'] <= row['No']:
        total = (row['Si'] + row['No'] + row['Abstención'])
        if total > 0:
            ar = row['No'] / (row['Si'] + row['No'] + row['Abstención'])
            return ar

df['agreement_ratio'] = df.apply(lambda row: agreement_ratio(row), axis=1)

In [10]:
# Calculate missing voting outcomes
def missing_voting_outcome(row):
    total_session = row['Si'] + row['No'] + row['Abstención']
    if total_session > 0:
        percentage_yes_simple = row['Si'] / total_session* 100
    else:
        percentage_yes_simple = total_session
    if (row["Resultado"] == "") and (row["Quorum"] == 'Quórum Simple'):
        if percentage_yes_simple > 50:
            return 'Aprobado'
        elif percentage_yes_simple <= 50:
            return "Unánime"
    elif row["Resultado"] == "" and row["Quorum"] == 'Quórum Calificado':
        if row['Si'] / 120 - row['Dispensado'] > 0.5:
            return 'Aprobado'
        elif row['Si'] / 120 - row['Dispensado'] <= 0.5:
            return "Unánime"
    elif row["Resultado"] == "" and row["Quorum"] == 'Ley Orgánica Constitucional':
        if row['Si'] / 120 - row['Dispensado'] >= 4/7:
            return 'Aprobado'
        if row['Si'] / 120 - row['Dispensado'] < 4/7:
            return "Unánime"
    elif row["Resultado"] == "" and (row["Quorum"] == 'Reforma Constitucional 2/3' or row["Quorum"] == '2/3'):
        if row['Si'] / 120 - row['Dispensado'] >= 2/3:
            return 'Aprobado'
        if row['Si'] / 120 - row['Dispensado'] < 2/3:
            return 'Unánime'
    elif row["Resultado"] == "" and (row["Quorum"] == 'Reforma Constitucional 3/5' or row["Quorum"] == '3/5'):
        if row['Si'] / 120 - row['Dispensado'] >= 3/5:
            return 'Aprobado'
        if row['Si'] / 120 - row['Dispensado'] < 3/5:
            return 'Unánime'
    elif row["Resultado"] == "" and row["Quorum"] == '1/3':
        if row['Si'] / 120 - row['Dispensado'] >= 1/3:
            return 'Aprobado'
        if row['Si'] / 120 - row['Dispensado'] < 1/3:
            return 'Unánime'
    elif row["Resultado"] != "":
        return row["Resultado"]

df['Resultados2'] = df.apply(lambda row: missing_voting_outcome(row), axis=1)

In [11]:
# Categorize the voting outcome(df["Resultado"])
def voting_outcome_cat(row):  
    if row["Resultados2"] == "Aprobado":
        return 1
    elif row["Resultados2"] == "Unánime":
        return 2
    elif row["Resultados2"] == "Empate":
        return 2
    elif row["Resultados2"] == "Sin Quórum":
        return 2
        
df['voting_outcome'] = df.apply(lambda row: voting_outcome_cat(row), axis=1)

In [12]:
# Asign the legislative session
def legislative_term(row):  
    if datetime.datetime(2002,11,3) <= row["Fecha"] < datetime.datetime(2006,11,3):
        return "2002-2006"
    elif datetime.datetime(2006,11,3) <= row["Fecha"] < datetime.datetime(2010,11,3):
        return "2006-2010"
    elif datetime.datetime(2010,11,3) <= row["Fecha"] < datetime.datetime(2014,11,3):
        return "2010-2014"
    elif datetime.datetime(2014,11,3) <= row["Fecha"] < datetime.datetime(2018,11,3):
        return "2014-2018"
    elif datetime.datetime(2018,11,3) <= row["Fecha"] < datetime.datetime(2022,11,3):
        return "2018-2022"
    elif datetime.datetime(2022,11,3) <= row["Fecha"] < datetime.datetime(2026,11,3):
        return "2018-2022"

df['legislative_term'] = df.apply(lambda row: legislative_term(row), axis=1)

In [13]:
df.head()

Unnamed: 0,ID,Fecha,Descripción,Si,No,Abstención,Dispensado,Resultado,Tipo,Quorum,agreement_ratio,Resultados2,voting_outcome,legislative_term
0,1990,2002-12-19 12:06:00,Boletín N° 2625-07,65,0,0,0,,Proyecto de Ley,Quórum Simple,1.0,Aprobado,1,2002-2006
1,1986,2002-12-19 10:50:00,Boletín N° 3107-05,44,1,1,0,,Proyecto de Ley,Quórum Simple,0.956522,Aprobado,1,2002-2006
2,1984,2002-12-18 13:57:48,INFORME COMISION INVESTIGADORA-Solicitar a S.E...,56,0,0,0,,Otros,Quórum Simple,1.0,Aprobado,1,2002-2006
3,1983,2002-12-18 13:47:09,Proyecto de Acuerdo N° 171,74,0,1,0,,Proyecto de Acuerdo,Quórum Simple,0.986667,Aprobado,1,2002-2006
4,1982,2002-12-18 13:35:36,Boletín N° 2727-11,85,0,0,0,,Proyecto de Ley,Quórum Simple,1.0,Aprobado,1,2002-2006


In [14]:
# Store the data
df.to_csv(f"votes_{from_year}-{to_year}.csv", encoding="utf-8")