In [None]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

# Load election data
data_dir = "../data/"
income = pd.read_csv(data_dir + "income_data.csv")
income

In [None]:
# Convert the 'Average net income per person (K €)' column to numeric using .loc
income.loc[:, 'Average net income per person (K €)'] = income['Average net income per person (K €)'].apply(pd.to_numeric, errors='coerce')
income

In [None]:
income_municipality = income.groupby('Municipalities')['Average net income per person (K €)'].mean().reset_index()
income_municipality

In [None]:
# Load GeoJSON file
map_catalonia='municipality_map.geojson'
municipalities = gpd.read_file(data_dir + map_catalonia)

In [None]:
# Translate column used to merge datasets and sort dataset based on this
municipalities.rename(columns={'nom_muni': 'Municipalities'}, inplace=True)
municipalities=municipalities.sort_values(by='Municipalities')

In [None]:
# Merge municipalities map and income by municipality
merged_data_municipality = municipalities.merge(income_municipality, on='Municipalities')
merged_data_municipality

In [None]:
# Plot the average income on a map of Catalonian municipalities

fig = px.choropleth_mapbox(
    merged_data_municipality,
    geojson=merged_data_municipality.geometry,
    locations=merged_data_municipality.index,
    color='Average net income per person (K €)',  # Change to the column you want to visualize
    hover_name='Municipalities',
    mapbox_style="carto-positron",
    center={"lat": 41.8781, "lon": 1.7834},  # Center of Catalonia
    zoom=7,
    color_continuous_scale="sunset",
    title='Average net income per person by municipality'
)
fig.update_layout(height=600, width=800)
fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
# Load election data
deriv_dir = "../derivatives/"
elections_municipality = pd.read_csv(deriv_dir + "election_results_municipality.csv")
elections_municipality = elections_municipality.rename(columns={'Municipality_name': 'Municipalities'})
elections_municipality

In [None]:
# Merge the DataFrames based on 'Municipalities'
merged_df = pd.merge(elections_municipality, income_municipality, on='Municipalities', how='left')

# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation = merged_df['winner'].astype('category').cat.codes.corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between winner and Average net income per person (K €):", correlation)

# Plot a boxplot based on 'winner'
plt.figure(figsize=(12, 8))
sns.boxplot(x='winner', y='Average net income per person (K €)', data=merged_df)
plt.title('Boxplot of Average net income per person (K €) by Winner')
plt.show()

In [None]:
# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation_junts_votes = merged_df['JUNTS_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_psc_votes = merged_df['PSC_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_erc_votes = merged_df['ERC_votes'].corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between JUNTS votes and Average net income per person (K €):", correlation_junts_votes)
print("Correlation between PSC votes and Average net income per person (K €):", correlation_psc_votes)
print("Correlation between ERC votes and Average net income per person (K €):", correlation_erc_votes)

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['JUNTS_votes'], merged_df['Average net income per person (K €)'], alpha=0.5)
plt.xscale('log')  # Set x-axis to log scale
plt.title('JUNTS_votes vs Average net income per person (K €)')
plt.xlabel('log(JUNTS_votes)')
plt.ylabel('Average net income per person (K €)')
plt.show()

In [None]:
# Load dataset with correspondence between municipalities, counties and provinces of Catalonia
munic_county_prov_inh = pd.read_csv(data_dir + "municipality_county_province_cat.csv")
# Merge the two datasets on the 'Municipalities' column
result_df = pd.merge(income_municipality, munic_county_prov_inh[['Municipality_name', 'County_name', 'Province_name']], left_on='Municipalities', right_on='Municipality_name', how='left')
# Drop the duplicate 'Municipality_name' column
result_df = result_df.drop(columns='Municipality_name')
result_df

In [None]:
# Group by 'County_name' and calculate the average 'Average net income per person (K €)'
average_income_by_county = result_df.groupby('County_name')['Average net income per person (K €)'].mean().reset_index()

In [None]:
# Calculate income_min and income_max
income_min = average_income_by_county['Average net income per person (K €)'].min()
income_max = average_income_by_county['Average net income per person (K €)'].max()

# Assign values to alpha and beta
alpha = 0.6  # You can assign the value you want
beta = 0.8   # You can assign the value you want

# Calculate delta_income
delta_income = income_max - income_min

# Calculate income_1 and income_2
income_1 = income_min + alpha * delta_income
income_2 = income_min + beta * delta_income

# Function to assign values to wealth_label
def assign_wealth_label(x):
    if income_min <= x < income_1:
        return 'Low to Medium Income ({:.1f} to {:.1f} K €)'.format(round(income_min, 1), round(income_1, 1))
    elif income_1 <= x < income_2:
        return 'High Income ({:.1f} to {:.1f} K €)'.format(round(income_1, 1), round(income_2, 1))
    elif income_2 <= x <= income_max:
        return 'Very High Income ({:.1f} to {:.1f} K €)'.format(round(income_2, 1), round(income_max, 1))
    else:
        return None  # Handle out-of-range cases if necessary

# Create the new column "wealth_label"
average_income_by_county['wealth_label'] = average_income_by_county['Average net income per person (K €)'].apply(assign_wealth_label)

# Create the new column "wealth_label"
average_income_by_county['wealth_label'] = average_income_by_county['Average net income per person (K €)'].apply(assign_wealth_label)
average_income_by_county['wealth_label']

In [None]:
# Calculate quartiles
quartiles = average_income_by_county['Average net income per person (K €)'].quantile([0, 0.25, 0.5, 0.75, 1])

# Function to assign values to wealth_label based on quartiles
def assign_wealth_label_quartiles(x):
    if x <= quartiles.iloc[2]:  # Combine Low and Medium Income
        return 'Low/Medium Income (Q1: {:.1f} to Q3: {:.1f} K €)'.format(round(quartiles.iloc[0], 1), round(quartiles.iloc[2], 1))
    elif quartiles.iloc[2] < x <= quartiles.iloc[3]:
        return 'High Income (Q3: {:.1f} to Q4: {:.1f} K €)'.format(round(quartiles.iloc[2], 1), round(quartiles.iloc[3], 1))
    else:
        return 'Very High Income (Q4: >{:.1f} K €)'.format(round(quartiles.iloc[3], 1))

# Create the new column "wealth_label_quartiles" based on modified quartiles
average_income_by_county['wealth_label_quartiles'] = average_income_by_county['Average net income per person (K €)'].apply(assign_wealth_label_quartiles)

# Display the new column
average_income_by_county['wealth_label_quartiles']

plt.hist(average_income_by_county['Average net income per person (K €)'])

In [None]:
# Load GeoJSON file
map_catalonia='county_map.geojson'
counties = gpd.read_file(data_dir + map_catalonia)
# Rename column of interest and sort data by the latter
counties.rename(columns={'nomcomar': 'County_name'}, inplace=True)
counties_sorted=counties.sort_values(by='County_name')

In [None]:
# Merge county map and income by county
merged_data_county = counties_sorted.merge(average_income_by_county, on='County_name')

In [None]:
# Plot the election results on a map of Catalonian counties

fig = px.choropleth_mapbox(
    merged_data_county,
    geojson=merged_data_county.geometry,
    locations=merged_data_county.index,
    color='Average net income per person (K €)',  # Change to the column you want to visualize
    hover_name='County_name',
    mapbox_style="carto-positron",
    center={"lat": 41.8781, "lon": 1.7834},  # Center of Catalonia
    zoom=7,
    color_continuous_scale="sunset",
    title='Average net income per person by county'
)

fig.update_layout(height=600, width=800)
fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
# Plot the election results on a map of Catalonian counties

fig = px.choropleth_mapbox(
    merged_data_county,
    geojson=merged_data_county.geometry,
    locations=merged_data_county.index,
    color='wealth_label_quartiles',  # Change to the column you want to visualize
    hover_name='County_name',
    mapbox_style="carto-positron",
    center={"lat": 41.8781, "lon": 1.7834},  # Center of Catalonia
    zoom=7,
    color_discrete_map={'Low/Medium Income (Q1: 10.3 to Q3: 13.5 K €)': 'lightgreen', 'High Income (Q3: 13.5 to Q4: 14.1 K €)': 'green', 'Very High Income (Q4: >14.1 K €)': 'darkgreen'}, 
    title='Average net income per person by county'
)

fig.update_layout(height=600, width=800)
fig.update_geos(fitbounds="locations", visible=False)

fig.show()

In [None]:
# Load election data by county
elections_county = pd.read_csv(deriv_dir + "election_results_county.csv")

# Divide all votes columns by the 'Electoral census'
votes_columns = ['ERC_votes', 'PSC_votes', 'ECP_votes', 'JUNTS_votes', 'PP_votes', 'PACMA_votes', 'MASPAIS_votes',
                  'RECORTES0_votes', 'IFEM_votes', 'PUM+J_votes', 'PCTC_votes', 'PCPC_votes', 'IZQP_votes']

# Iterate over each votes column and divide by 'Electoral census'
for column in votes_columns:
    elections_county[column] = elections_county[column] / elections_county['Electoral census']

# Display the updated DataFrame
elections_county

In [None]:
# Merge the DataFrames based on 'County'
merged_df = pd.merge(elections_county, average_income_by_county, on='County_name', how='left')

# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation = merged_df['winner'].astype('category').cat.codes.corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between winner and Average net income per person (K €):", correlation)

# Plot a boxplot based on 'winner'
plt.figure(figsize=(12, 8))
sns.boxplot(x='winner', y='Average net income per person (K €)', data=merged_df)
plt.title('Boxplot of Average net income per person (K €) by Winner')
plt.show()

In [None]:
# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation_junts_votes = merged_df['JUNTS_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_psc_votes = merged_df['PSC_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_erc_votes = merged_df['ERC_votes'].corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between JUNTS votes and Average net income per person (K €):", correlation_junts_votes)
print("Correlation between PSC votes and Average net income per person (K €):", correlation_psc_votes)
print("Correlation between ERC votes and Average net income per person (K €):", correlation_erc_votes)

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['JUNTS_votes'], merged_df['Average net income per person (K €)'], alpha=0.5)
plt.title('JUNTS_votes vs Average net income per person (K €)')
plt.xlabel('log(JUNTS_votes)')
plt.xscale('log')
plt.ylabel('Average net income per person (K €)')
plt.show()

In [None]:
# Load the province map
map_catalonia='province_map.geojson'
provinces = gpd.read_file(data_dir + map_catalonia)
# Rename column of interest and sort data by the latter
provinces.rename(columns={'nomprov': 'Province_name'}, inplace=True)
provinces_sorted=provinces.sort_values(by='Province_name')


In [None]:
# Group by 'Province_name' and calculate the average 'Average net income per person (K €)'
average_income_by_province = result_df.groupby('Province_name')['Average net income per person (K €)'].mean().reset_index()

In [None]:
# Merge province map and income by province
merged_data_prov = provinces_sorted.merge(average_income_by_province, on='Province_name')

In [None]:
# Plot the election results on a map of Catalonian provinces
fig = px.choropleth_mapbox(
    merged_data_prov,
    geojson=merged_data_prov.geometry,
    locations=merged_data_prov.index,
    color='Average net income per person (K €)',  # Change to the column you want to visualize
    hover_name='Province_name',
    mapbox_style="carto-positron",
    center={"lat": 41.8781, "lon": 1.7834},  # Center of Catalonia
    zoom=7,
    color_continuous_scale="sunset",
    title='Average net income per person by province'
)
fig.update_layout(height=600, width=800)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
# Load election data by province
elections_province = pd.read_csv(deriv_dir + "election_results_province.csv")

In [None]:
# Merge the DataFrames based on 'County'
merged_df = pd.merge(elections_province, average_income_by_province, on='Province_name', how='left')

# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation = merged_df['winner'].astype('category').cat.codes.corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between winner and Average net income per person (K €):", correlation)

# Plot a boxplot based on 'winner'
plt.figure(figsize=(12, 8))
sns.boxplot(x='winner', y='Average net income per person (K €)', data=merged_df)
plt.title('Boxplot of Average net income per person (K €) by Winner')
plt.show()

In [None]:
# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation_junts_votes = merged_df['JUNTS_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_psc_votes = merged_df['PSC_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_erc_votes = merged_df['ERC_votes'].corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between JUNTS votes and Average net income per person (K €):", correlation_junts_votes)
print("Correlation between PSC votes and Average net income per person (K €):", correlation_psc_votes)
print("Correlation between ERC votes and Average net income per person (K €):", correlation_erc_votes)

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['JUNTS_votes'], merged_df['Average net income per person (K €)'], alpha=0.5)
plt.title('JUNTS_votes vs Average net income per person (K €)')
plt.xlabel('log(JUNTS_votes)')
plt.ylabel('Average net income per person (K €)')
plt.xscale('log')
plt.show()