# Carga de datos

Se utilizan los archivos Excel con los resultados preliminares de las
elecciones parlamentarias de 2025 en Chile, entregados por el
[Servel](https://elecciones.servel.cl/).

A cada candidato se le agrega el pacto al cual pertenece y el distrito por el
cual está compitiendo, guardando la información en `datos_2025.csv`.

In [26]:
import numpy as np
import pandas as pd
from openpyxl import load_workbook
import re

from data_load_utils import add_percentage

In [20]:
# function that receives a candidate row, and returns the pact to which the
# candidate belongs (the first row above the candidate that doesn't have a 
# candidate name)
def get_pact(row, df):
    
    if pd.isna(row["candidate"]):
        return None
    
    for i in range(row.name, -1, -1):
        
        if pd.isna(df.loc[i]["candidate"]):
            return df.loc[i]["first_pact"]
        
    return None

In [24]:
def clean_party(party: str):
    """
    Quita la diferencia entre postulantes independientes asociados a un partido
    y militantes del partido (pues se tratan de igual forma en la elección).
    """
    return party.replace("IND - ", "")

def clean_candidate(candidate: str):
    """
    Quita el número antes del nombre del candidato.
    """
    return re.sub(r"^[^ ]*", "", candidate)

In [27]:
results = pd.DataFrame(
    np.empty((0, 5)),
    columns=["candidate", "pact", "party", "district", "votes"]
)

# for each district
for n in range(1, 29):
    
    # load excel
    district_df = pd.read_excel(
        f"datos_2025/distrito_{n}.xlsx",
        header=10,
        names=["first_pact", "candidate", "party", "votes"],
        usecols="A:D"
    )

    # assign pact and district to each candidate
    district_df["pact"] = district_df.apply(
        get_pact, args=(district_df,), axis=1
    )
    district_df["district"] = n

    # remove pact rows and reorder columns
    district_df = district_df[~district_df["candidate"].isna()][
        ["candidate", "pact", "party", "district", "votes"]
    ]

    # concatenate to main dataframe
    results = pd.concat((results, district_df), axis=0)

# reset index, drop null candidates (edge case)
results = results.reset_index().iloc[:, 1:]
results = results.drop(results[results["candidate"].isna()].index)

# give independents their own pct (workaround)
results.loc[results["pact"]=="CANDIDATURA INDEPENDIENTE", "pact"] = (
    results[results["pact"]=="CANDIDATURA INDEPENDIENTE"]["candidate"]
)

# cast district number and vote tallies to integers
results[["district", "votes"]] = results[["district", "votes"]].astype(int)

# clean names
results["party"] = results["party"].apply(clean_party)
results["candidate"] = results["candidate"].apply(clean_candidate)

# add percentage of votes (of their district) to each candidate
results = add_percentage(results)

In [31]:
# rename parties and pacts
results = results.replace({
    "pact": {
        "A - PARTIDO ECOLOGISTA VERDE": "PEV",
        "B - VERDES, REGIONALISTAS Y HUMANISTAS": "FREVS/AH",
        "C - UNIDAD POR CHILE": "UNIDAD",
        "D - IZQUIERDA ECOLOGISTA POPULAR ANIMALISTA Y HUMANISTA": "PH/PI",
        "E - MOVIMIENTO AMARILLOS POR CHILE": "AMARILLOS",
        "F - PARTIDO DE TRABAJADORES REVOLUCIONARIOS": "PTR",
        "G - PARTIDO ALIANZA VERDE POPULAR": "PAVP",
        "H - POPULAR": "POPULAR",
        "I - PARTIDO DE LA GENTE": "PDG",
        "J - CHILE GRANDE Y UNIDO": "CHILEVAMOS",
        "K - CAMBIO POR CHILE": "REP/PNL/PSC",
        "CANDIDATURA INDEPENDIENTE": "INDEP"
    }
})

In [32]:
# most and least voted candidates
results.sort_values("votes", ascending=False)

Unnamed: 0,candidate,pact,party,district,votes,percentage
1090,CLAUDIA PAOLA BARRIENTOS SANCHEZ,68 CLAUDIA PAOLA BARRIENTOS SANCHEZ,IND,28,0,
0,RICARDO SANZANA OTEIZA,FREVS/AH,AH,1,0,
1,LEONARDO VALENZUELA ATENAS,FREVS/AH,AH,1,0,
1074,THOMAS LORCA ALMONACID,FREVS/AH,FREVS,28,0,
1073,RODRIGO UTZ CONTRERAS,FREVS/AH,FREVS,28,0,
...,...,...,...,...,...,...
7,SANDRA FLORES CONTRERAS,UNIDAD,PS,1,0,
6,GIOVANNA CALLE CAPUMA,UNIDAD,PPD,1,0,
5,JORGE DIAZ IBARRA,UNIDAD,PDC,1,0,
4,LUIS MALLA VALENZUELA,UNIDAD,PL,1,0,


In [33]:
# save data to csv file
results.to_csv("datos_2025.csv")