In [2]:
import pandas as pd

# Peru

Data extracted from https://resultadoshistorico.onpe.gob.pe/.

In [1]:
year = 2021
election = "first_round"

def create_polling_id(data, columns=[]):
    output = data[columns[0]].copy()

    for column in columns[1:]:
        output += "-" + data[column].astype(str)
    return output

In [3]:
df = pd.read_excel(
    f"../data/Peru/EG{year}_Resultados_Presidencial_{election}.xlsx",
    sheet_name="Nivel_Distrital"
)
df["polling_id"] = create_polling_id(df, ["Region", "Provincia", "Distrito"])
df.head()

Unnamed: 0,Region,Provincia,Distrito,Electores,Participación,% Participación,Ausentismo,% Ausentismo,Votos emitidos,% Votos emitidos,Votos válidos,% Votos válidos,Organización Política,Tipo Organización Política,Votos,% Votos,polling_id
0,AMAZONAS,CHACHAPOYAS,CHACHAPOYAS,20819,13619,0.6542,7200,0.3458,13619,0.6542,10948,0.5259,ACCION POPULAR,PARTIDO POLÍTICO,1338.0,0.1222,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS
1,AMAZONAS,CHACHAPOYAS,CHACHAPOYAS,20819,13619,0.6542,7200,0.3458,13619,0.6542,10948,0.5259,ALIANZA PARA EL PROGRESO,PARTIDO POLÍTICO,1009.0,0.0922,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS
2,AMAZONAS,CHACHAPOYAS,CHACHAPOYAS,20819,13619,0.6542,7200,0.3458,13619,0.6542,10948,0.5259,AVANZA PAIS - PARTIDO DE INTEGRACION SOCIAL,PARTIDO POLÍTICO,1132.0,0.1034,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS
3,AMAZONAS,CHACHAPOYAS,CHACHAPOYAS,20819,13619,0.6542,7200,0.3458,13619,0.6542,10948,0.5259,DEMOCRACIA DIRECTA,PARTIDO POLÍTICO,36.0,0.0033,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS
4,AMAZONAS,CHACHAPOYAS,CHACHAPOYAS,20819,13619,0.6542,7200,0.3458,13619,0.6542,10948,0.5259,EL FRENTE AMPLIO POR JUSTICIA. VIDA Y LIBERTAD,PARTIDO POLÍTICO,112.0,0.0102,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS


In [25]:
df.Region.unique()

array(['AMAZONAS', 'ANCASH', 'APURIMAC', 'AREQUIPA', 'AYACUCHO',
       'CAJAMARCA', 'CUSCO', 'HUANCAVELICA', 'HUANUCO', 'ICA', 'JUNIN',
       'LA LIBERTAD', 'LAMBAYEQUE', 'LIMA', 'LORETO', 'MADRE DE DIOS',
       'MOQUEGUA', 'PASCO', 'PIURA', 'PUNO', 'SAN MARTIN', 'TACNA',
       'TUMBES', 'CALLAO', 'UCAYALI'], dtype=object)

In [5]:
df_candidates = pd.read_excel(
    f"../data/Peru/EG{year}_Candidatos_Presidencial_{election}.xlsx"
)
df_candidates["candidate"] = df_candidates.apply(lambda x: f"{x['Prenombres']} {x['Primer apellido']} {x['Segundo apellido']}", axis=1)
df_candidates = df_candidates[df_candidates["Cargo"] == "PRESIDENTE DE LA REPÚBLICA"]
df_candidates = df_candidates[["candidate", "Organización Política"]]
df_candidates.head()

Unnamed: 0,candidate,Organización Política
0,YONHY LESCANO ANCIETA,ACCION POPULAR
3,CESAR ACUÑA PERALTA,ALIANZA PARA EL PROGRESO
6,HERNANDO DE SOTO POLAR,AVANZA PAIS - PARTIDO DE INTEGRACION SOCIAL
9,ANDRES AVELINO ALCANTARA PAREDES,DEMOCRACIA DIRECTA
12,MARCO ANTONIO ARANA ZEGARRA,"EL FRENTE AMPLIO POR JUSTICIA, VIDA Y LIBERTAD"


In [6]:
df = pd.merge(df, df_candidates, on="Organización Política")

In [7]:
df.head()

Unnamed: 0,Region,Provincia,Distrito,Electores,Participación,% Participación,Ausentismo,% Ausentismo,Votos emitidos,% Votos emitidos,Votos válidos,% Votos válidos,Organización Política,Tipo Organización Política,Votos,% Votos,polling_id,candidate
0,AMAZONAS,CHACHAPOYAS,CHACHAPOYAS,20819,13619,0.6542,7200,0.3458,13619,0.6542,10948,0.5259,ACCION POPULAR,PARTIDO POLÍTICO,1338.0,0.1222,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS,YONHY LESCANO ANCIETA
1,AMAZONAS,CHACHAPOYAS,ASUNCION,285,114,0.4,171,0.6,114,0.4,90,0.3158,ACCION POPULAR,PARTIDO POLÍTICO,17.0,0.1889,AMAZONAS-CHACHAPOYAS-ASUNCION,YONHY LESCANO ANCIETA
2,AMAZONAS,CHACHAPOYAS,BALSAS,1012,504,0.498,508,0.502,504,0.498,356,0.3518,ACCION POPULAR,PARTIDO POLÍTICO,52.0,0.1461,AMAZONAS-CHACHAPOYAS-BALSAS,YONHY LESCANO ANCIETA
3,AMAZONAS,CHACHAPOYAS,CHETO,587,411,0.7002,176,0.2998,411,0.7002,296,0.5043,ACCION POPULAR,PARTIDO POLÍTICO,47.0,0.1588,AMAZONAS-CHACHAPOYAS-CHETO,YONHY LESCANO ANCIETA
4,AMAZONAS,CHACHAPOYAS,CHILIQUIN,732,324,0.4426,408,0.5574,324,0.4426,220,0.3005,ACCION POPULAR,PARTIDO POLÍTICO,28.0,0.1273,AMAZONAS-CHACHAPOYAS-CHILIQUIN,YONHY LESCANO ANCIETA


In [14]:
df_abstentions = df[["polling_id", "Ausentismo"]].drop_duplicates().rename(columns={"Ausentismo": "value"})
df_abstentions["candidate"] = "ABSTENTION"
df_abstentions["flag_candidates"] = 0
df_abstentions.head()

Unnamed: 0,polling_id,value,candidate,flag_candidates
0,AMAZONAS-CHACHAPOYAS-CHACHAPOYAS,7200,ABSTENTION,0
1,AMAZONAS-CHACHAPOYAS-ASUNCION,171,ABSTENTION,0
2,AMAZONAS-CHACHAPOYAS-BALSAS,508,ABSTENTION,0
3,AMAZONAS-CHACHAPOYAS-CHETO,176,ABSTENTION,0
4,AMAZONAS-CHACHAPOYAS-CHILIQUIN,408,ABSTENTION,0


In [15]:
tt = df.groupby(["polling_id", "candidate"]).agg({"Votos": "sum"})
tt["rate"] = tt.groupby(level=[0], group_keys=False).apply(lambda x: x/x.sum()).fillna(0)
tt = tt.reset_index()
tt = tt[["polling_id", "candidate", "rate", "Votos"]]

tt["rank"] = tt.groupby(["polling_id"])["Votos"].rank("min", ascending=False).astype(int)

In [17]:
df_filtered = tt[["polling_id", "candidate", "Votos", "rank", "rate"]].copy()
df_filtered = df_filtered.rename(columns={"Votos": "value"})
df_filtered["candidate"] = df_filtered["candidate"].apply(lambda x: x.strip())
df_filtered["flag_candidates"] = 1

data = pd.concat([
    df_filtered,
    df_abstentions
])

data.to_csv(f"../data_output/Peru/{year}_{election}.csv.gz", compression="gzip", index=False)

In [23]:
data.query("flag_candidates == 1").groupby("candidate").agg({"value": "sum"}).sort_values("value", ascending=False)

Unnamed: 0_level_0,value
candidate,Unnamed: 1_level_1
JOSE PEDRO CASTILLO TERRONES,2714152.0
KEIKO SOFIA FUJIMORI HIGUCHI,1907896.0
RAFAEL BERNARDO LOPEZ ALIAGA CAZORLA,1657575.0
HERNANDO DE SOTO POLAR,1652682.0
YONHY LESCANO ANCIETA,1294681.0
VERONIKA FANNY MENDOZA FRISCH,1111407.0
CESAR ACUÑA PERALTA,863955.0
DANIEL BELIZARIO URRESTI ELERA,808559.0
JULIO ARMANDO GUZMAN CACERES,319176.0
ALBERTO ISMAEL BEINGOLEA DELGADO,282007.0


In [29]:
df_location = df[["polling_id", "Region", "Provincia", "Distrito", "Electores", "Participación"]].drop_duplicates()
df_location = df_location.rename(columns={
    "Region": "region",
    "Provincia": "province",
    "Distrito": "district",
    "Electores": "voters",
    "Participación": "participation"
})

df_location.to_csv(f"../data_output/Peru/{year}_{election}_location.csv.gz", compression="gzip", index=False)