In [None]:
import geopandas as gpd
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

# Load income data
data_dir = "../data/"
income = pd.read_csv(data_dir + "income_data.csv")
income

In [None]:
# Extract Barcelona data
income_bcn = income[income['Municipalities'] == 'Barcelona']
income_bcn

In [None]:
# Convert the 'Average net income per person (K €)' column to numeric using .loc
income_bcn.loc[:, 'Average net income per person (K €)'] = income_bcn['Average net income per person (K €)'].astype(float)
income_bcn.loc[:, 'Districts'] = income_bcn['Districts'].astype(float)

In [None]:
# Calculate mean income for each district of Barcelona
income_bcn_district = income_bcn.groupby('Districts')['Average net income per person (K €)'].mean().reset_index().sort_values(by='Districts')
income_bcn_district

In [None]:
# Read Barcelona's districts map
bcn = gpd.read_file(data_dir + "districts_BCN.geojson")
bcn.head()
# Select only the columns 'NOM' and 'geometry' from the GeoDataFrame
bcn = bcn[['NOM','geometry']]
# Rename the 'DISTRICTE' column to 'Districts'
bcn.rename(columns={'DISTRICTE': 'Districts'}, inplace=True)
# Create a new GeoDataFrame with the selected columns
bcn_gpd = gpd.GeoDataFrame(bcn)
# Add a new column 'Districts' with values 1 to 10
bcn_gpd['Districts'] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
bcn_gpd

In [None]:
# Calculate income_min as the minimum value in the 'Average net income per person (K €)' column
income_min = income_bcn_district['Average net income per person (K €)'].min()

# Calculate quartiles for 'Average net income per person (K €)'
quartiles = income_bcn_district['Average net income per person (K €)'].quantile([0.25, 0.5, 0.75])

# Define income values based on quartiles
income_1 = quartiles[0.25]
income_3 = quartiles[0.75]
income_max = income_bcn_district['Average net income per person (K €)'].max()

# Function to assign values to income_label with quartiles
def assign_income_label(x):
    if income_min <= x < income_1:
        return 'Low Income (Q1: {:.1f} - Q2: {:.1f} K €)'.format(round(income_min, 1), round(income_1, 1))
    elif income_1 <= x < income_3:
        return 'Medium/High Income (Q2: {:.1f} - Q3: {:.1f} K €)'.format(round(income_1, 1), round(income_3, 1))
    else:
        return 'Very High Income (Q3: {:.1f} - Q4: {:.1f} K €)'.format(round(income_3, 1), round(income_max, 1))

# Create the new column "income_label" with quartiles
income_bcn_district['income_label'] = income_bcn_district['Average net income per person (K €)'].apply(assign_income_label)

# View the resulting DataFrame
print(income_bcn_district)
print(quartiles)
# Plot a histogram
plt.hist(income_bcn_district['Average net income per person (K €)'])
plt.xlabel('Average net income per person (K €)')
plt.ylabel('Frequency')
plt.title('Income Distribution in Barcelona Districts')
plt.show()


In [None]:
# Merge Barcelona's districts map with income in each district
merged_data = bcn_gpd.merge(income_bcn_district, on='Districts')
merged_data

In [None]:
# Plot the average income on a map of Barcelona's districts

fig = px.choropleth_mapbox(
    merged_data,
    geojson=merged_data.geometry,
    locations=merged_data.index,
    color='Average net income per person (K €)',  # Change to the column you want to visualize
    hover_name='Districts',
    mapbox_style="carto-positron",
    center={"lat": 41.3874, "lon": 2.1686},
    title= 'Income by district',# Center of Catalonia
    color_continuous_scale="sunset",
    zoom=10
)

fig.update_layout(height=600, width=800)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
# Plot the average income on a map of Barcelona's districts
merged_data_sort = merged_data.sort_values(by='Average net income per person (K €)')
fig = px.choropleth_mapbox(
    merged_data_sort,
    geojson=merged_data_sort.geometry,
    locations=merged_data_sort.index,
    color='income_label',  # Change to the column you want to visualize
    hover_name='Districts',
    mapbox_style="carto-positron",
    center={"lat": 41.3874, "lon": 2.1686},
    color_discrete_map={'Low Income (Q1: 11.6 - Q2: 14.4 K €)': 'lightgreen', 'Medium/High Income (Q2: 14.4 - Q3: 18.6 K €)': 'green', 'Very High Income (Q3: 18.6 - Q4: 26.2 K €)': 'darkgreen'}, 
    title= 'Wealth level by district',# Center of Catalonia
    zoom=10
)

fig.update_layout(height=600, width=800)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()

In [None]:
# Load election data
deriv_dir = "../derivatives/"
elections_bcn = pd.read_csv(deriv_dir + "election_results_bcn.csv")
elections_bcn = elections_bcn.rename(columns={'District': 'Districts'})

In [None]:
# Merge the DataFrames based on 'Districts'
merged_df = pd.merge(elections_bcn, income_bcn_district, on='Districts', how='left')

# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation = merged_df['winner'].astype('category').cat.codes.corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between winner and Average net income per person (K €):", correlation)

# Plot a boxplot based on 'winner'
plt.figure(figsize=(12, 8))
sns.boxplot(x='winner', y='Average net income per person (K €)', data=merged_df)
plt.title('Boxplot of Average net income per person (K €) by Winner')
plt.show()


In [None]:
# Check the correlation between 'winner' and 'Average net income per person (K €)'
correlation_junts_votes = merged_df['JUNTS_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_psc_votes = merged_df['PSC_votes'].corr(merged_df['Average net income per person (K €)'])
correlation_erc_votes = merged_df['ERC_votes'].corr(merged_df['Average net income per person (K €)'])

# Display the correlation
print("Correlation between JUNTS votes and Average net income per person (K €):", correlation_junts_votes)
print("Correlation between PSC votes and Average net income per person (K €):", correlation_psc_votes)
print("Correlation between ERC votes and Average net income per person (K €):", correlation_erc_votes)

# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(merged_df['JUNTS_votes'], merged_df['Average net income per person (K €)'], alpha=0.5)
plt.title('Scatter Plot of JUNTS_votes vs Average net income per person (K €)')
plt.xlabel('JUNTS_votes')
plt.ylabel('Average net income per person (K €)')
plt.show()