In [None]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [None]:
# Load election data
data_dir = "../data/"
election_data = pd.read_csv(data_dir + "G20192-Columnes-ME-EN.csv", sep=';')

In [None]:
# Load GeoJSON file with Catalonian municipalities
map_catalonia='municipality_map.geojson'
geo_data = gpd.read_file(data_dir + map_catalonia)

In [None]:
# Translate column used to merge datasets
geo_data.rename(columns={'nom_muni': 'Municipality_name'}, inplace=True)
# Sort array by column of interest
geo_data_sorted=geo_data.sort_values(by='Municipality_name')

In [None]:
# Sum up the participation for each municipality
participation_by_municipality = election_data.iloc[:, 11:21].groupby(by=election_data["Municipality_name"]).sum()
participation_by_municipality.reset_index()
participation_by_municipality

In [None]:
# Add a column to calculate the participation ratio (total number of voting-eligible population)
participation_by_municipality['Participation_rate'] = participation_by_municipality['Part_20:00'] / participation_by_municipality['Electoral_census']
participation_by_municipality

In [None]:
# Merge municipality map and participation by municipality
merged_data = geo_data_sorted.merge(participation_by_municipality, on='Municipality_name')
merged_data

In [None]:
# Plot the participation rate on a map of Catalonian municipalities

fig = px.choropleth_mapbox(
    merged_data,
    geojson=merged_data.geometry,
    locations=merged_data.index,
    color='Participation_rate',  # Change to the column you want to visualize
    hover_name='Municipality_name',
    mapbox_style="carto-positron",
    center={"lat": 41.8781, "lon": 1.7834},  # Center of Catalonia
    zoom=7
)

fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(height=600, width=800)
fig.show()

In [None]:
# Plot participation rate vs municipality electoral census
plt.scatter(participation_by_municipality['Electoral_census'], participation_by_municipality['Participation_rate'])
plt.xlabel('log(Municipality electoral census)')
plt.ylabel('Participation rate')
plt.xscale('log')  # Set x-axis to log scale

# Show the plot
plt.show()

# Calculate and print the correlation coefficient
correlation_coefficient = participation_by_municipality['Electoral_census'].corr(participation_by_municipality['Participation_rate'])
print(f'Correlation Coefficient: {correlation_coefficient}')

In [None]:
# Histogram of participation rate 
plt.hist(participation_by_municipality['Participation_rate'], bins=30)