# Study Grouped Catalan Elections Dataset

Load libraries:

In [1]:
import pandas as pd
import pprint
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import textdistance
from tqdm import tqdm
from unidecode import unidecode
import logging

pp = pprint.PrettyPrinter(indent=2)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

Load the clean dataset:

In [2]:
df = pd.read_pickle('../../data/processed/catalan-elections-grouped-data.pkl')
df_original = df.copy()

## Dataset Structure 

Visualize the structure of the dataset:

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12339340 entries, 0 to 12339339
Data columns (total 24 columns):
 #   Column                  Dtype         
---  ------                  -----         
 0   index_autonumeric       int64         
 1   nom_eleccio             object        
 2   id_nivell_territorial   object        
 3   nom_nivell_territorial  object        
 4   territori_codi          object        
 5   territori_nom           object        
 6   seccio                  Int64         
 7   vots                    int32         
 8   escons                  float64       
 9   districte               Int64         
 10  mesa                    object        
 11  party_code              int32         
 12  party_name              object        
 13  party_abbr              object        
 14  party_color             object        
 15  clean_party_name        object        
 16  clean_party_abbr        object        
 17  type                    object        
 18  

First of all, we want to check that for every section in an election there aren't any repeaded ``party_codes``:

In [4]:
def check_duplicated_party_codes(df):
    def has_duplicates(group):
        duplicated = group["party_code"].duplicated(keep=False)
        if duplicated.any():
            return pd.Series(
                {
                    "party_code": group.loc[duplicated, "party_code"].unique().tolist(),
                    "party_name": group.loc[duplicated, "party_name"].unique().tolist(),
                }
            )

    duplicates = df.groupby(
        ["nom_eleccio", "seccio", "districte", "territori_codi"]
    ).apply(has_duplicates)
    return duplicates.dropna()


duplicated_party_codes = check_duplicated_party_codes(df)

In [5]:
duplicated_party_codes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,party_code,party_name
nom_eleccio,seccio,districte,territori_codi,Unnamed: 4_level_1,Unnamed: 5_level_1
Eleccions Municipals 1979,0,1,8019,[3000000],[Altres partits]
Eleccions Municipals 1979,0,2,8019,[3000000],[Altres partits]
Eleccions Municipals 1979,0,3,8019,[3000000],[Altres partits]
Eleccions Municipals 1979,0,4,8019,[3000000],[Altres partits]
Eleccions Municipals 1979,0,5,8019,[3000000],[Altres partits]
...,...,...,...,...,...
Eleccions al Parlament de Catalunya 2021,215,8,8019,"[6, 10, 86, 301, 546, 693, 1034, 1035, 1083, 1...",[Partit dels Socialistes de Catalunya (PSC-PSO...
Eleccions al Parlament de Catalunya 2021,234,10,8019,"[6, 10, 301, 546, 693, 1034, 1035, 1083, 1097,...",[Partit dels Socialistes de Catalunya (PSC-PSO...
Eleccions al Parlament de Catalunya 2021,234,10,08019,[86],[Partit Popular]
Eleccions al Parlament de Catalunya 2021,235,10,8019,"[6, 10, 86, 301, 546, 693, 1034, 1035, 1083, 1...",[Partit dels Socialistes de Catalunya (PSC-PSO...


In [6]:
# Join all lists of party codes and names into one list
# all_party_codes = duplicated_party_codes['party_code'].sum()
all_party_names = duplicated_party_codes['party_name'].sum()

# Count the number of times each party code appears
# party_code_counts = pd.Series(all_party_codes).value_counts()

# Count the number of times each party name appears
party_name_counts = pd.Series(all_party_names).value_counts()

# print(party_code_counts)
print(party_name_counts)

Partit dels Socialistes de Catalunya (PSC-PSOE)    165004
Convergència i Unió                                154943
Partit Popular                                     151254
Esquerra Republicana de Catalunya                  105437
Falange Española de las J.O.N.S.                    74060
                                                    ...  
Independents per Cava                                   1
Independents per La Vall de Boi                         1
Tria per Vallfogona                                     1
ARA TALARN                                              1
La Llista - Agullana l’ Estrada                         1
Name: count, Length: 870, dtype: int64
