# Assignment Starter Code

# Read structural data per district

In [None]:
!pip install openpyxl

In [None]:
import pandas as pd
# Make sure to read column "District" as a String
df = pd.read_excel('ew24_structure_data.xlsx',dtype={'District': str})
print(df.head())


# Read historical data an European Elections

In [None]:
import pandas as pd

# Replace 'file.csv' with the path to your CSV file
df = pd.read_csv('ew_79_24.csv',sep=";")

# Display the first few rows of the DataFrame
print(df.head())

# Read 2024 EU Election Resultions per District (Number of votes per party and distrct)

In [None]:
import pandas as pd

# Replace 'file.csv' with the path to your CSV file
df_votes = pd.read_csv('ewkr24_umrechnung_ew19.csv',sep=";",dtype={'District': str})

# Display the first few rows of the DataFrame
print(df_votes.head())

# Vizualize Geographic Data

To display election data, you don't need postal codes but rather the area numbers. You can find these geographic data here:

https://gdz.bkg.bund.de/index.php/default/verwaltungsgebiete-1-2-500-000-stand-31-12-vg2500-12-31.html

Download the data in the GK3 format. You will also find documentation in the download. All files are also provided in the materials folder

VG2500_KRS.shp / .dbf / .cpg / .prj / .shx

In [None]:
!pip install geopandas

In [None]:
!pip install matplotlib

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt

shapefile_path = 'VG2500_KRS.shp'
districts = gpd.read_file(shapefile_path)

fig, ax = plt.subplots(figsize=(10, 15))
districts.plot(ax=ax, color='white', edgecolor='black')

for x, y, label in zip(districts.geometry.centroid.x, districts.geometry.centroid.y, districts['AGS']):
    ax.text(x, y, label, fontsize=8, ha='center', va='center')

ax.set_title('Districts in Germany')
plt.show()

# Join Geodata and votes

In [None]:
districts.head()

In [None]:
df_votes

In [None]:
df_district_votes_spd = pd.merge(districts, df_votes[['District', 'SPD']], left_on='AGS', right_on='District', how='left')

# Display the result
print(df_district_votes_spd)

In [None]:
df_district_votes_spd[["AGS","District","SPD"]]


In [None]:
# Plotting the GeoDataFrame with a color map, but grayscale doesn't work
ax = df_district_votes_spd.plot(column='SPD', cmap='Greys', linewidth=0.8, edgecolor='0.8', legend=True)

ax.set_title("Map of SPD Voters")
ax.set_axis_off()
plt.show()

## Scale the numbers and use a different colormap

In [None]:
!pip install scikit-learn

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df_district_votes_spd['SPD_normalized'] = scaler.fit_transform(df_district_votes_spd[['SPD']])

# Display the DataFrame with the new normalized column
print(df_district_votes_spd[['AGS', 'District', 'SPD', 'SPD_normalized']])

In [None]:
from matplotlib.colors import LinearSegmentedColormap, Normalize
import matplotlib.colors as mcolors
import numpy as np

norm = Normalize(vmin=df_district_votes_spd['SPD_normalized'].min(), vmax=df_district_votes_spd['SPD_normalized'].max())
ax = df_district_votes_spd.plot(column='SPD_normalized', 
    cmap= LinearSegmentedColormap.from_list('color_spd', ['#fff', '#e3000f'], N=1000), 
    linewidth=0.8, edgecolor='0.8', 
    norm=norm, legend=True,
    )

ax.set_title("Normalized SPD Values")
ax.set_axis_off()
plt.show()

... or maybe not absolute numbers put percentage of voters are better to visualize?