Connecting data

In [None]:
%pip install pandas geopandas


1. Parlamentarni izbori

In [2]:
import pandas as pd
import geopandas as gpd

# Load CSV data
csv_data = pd.read_csv("data/election_results/kombinirani_rezultati.csv")

# Load GeoJSON data
geojson_data = gpd.read_file("data/administrative_units/gradovi_opcine_zupanije.geojson")


In [None]:
print(csv_data.head())
print(geojson_data.head())

# Ensure that the key columns are named consistently
csv_data['Županija'] = csv_data['Županija'].str.strip()  # Strip any extra spaces
csv_data['Grad/općina'] = csv_data['Grad/općina/država'].str.strip()

geojson_data['Županija'] = geojson_data['text_right'].str.strip()
geojson_data['Grad/općina'] = geojson_data['text_left'].str.strip()

# Merge dataframes based on the matching columns
parlamentarni_izbori = geojson_data.merge(csv_data, on=['Županija', 'Grad/općina'], how='inner')

In [None]:
%pip install folium
%pip install mapclassify
%pip install geopandas
%pip install selenium


In [None]:
import folium
from IPython.display import display

# Assuming parlamentarni_izbori is your already loaded GeoDataFrame
# Calculate the percentage of Važeći listići divided by Ukupno birača
parlamentarni_izbori['percentage'] = (parlamentarni_izbori['Važeći listići'] / parlamentarni_izbori['Ukupno birača']) * 100

min_value = parlamentarni_izbori['percentage'].min()
max_value = parlamentarni_izbori['percentage'].max()

# Create a color map for the percentage (from blue to yellow to red)
colormap = folium.LinearColormap(
    colors=['blue','yellow','red'],
    vmin=min_value,  # Minimum value for color map
    vmax=max_value,  # Maximum value for color map
    caption='Percentage of Važeći listići to Ukupno birača'
)

# Add color column based on percentage
parlamentarni_izbori['color'] = parlamentarni_izbori['percentage'].apply(lambda x: colormap(x))

# Convert Timestamp columns to strings
for col in parlamentarni_izbori.select_dtypes(include=['datetime64[ns, UTC]', 'datetime64[ns]']).columns:
    parlamentarni_izbori[col] = parlamentarni_izbori[col].astype(str)

# Prepare hover information
parlamentarni_izbori['hover_info'] = parlamentarni_izbori.apply(
    lambda row: f"Županija: {row['Županija']}<br>Grad/Općina: {row['Grad/općina']}<br>Ukupno birača: {row['Ukupno birača']}<br>Važeći listići: {row['Važeći listići']}<br>Percentage: {row['percentage']:.2f}%" +
    ''.join([f"<br>{col}: {row[col]}" for col in parlamentarni_izbori.columns if col.endswith('%')]), axis=1
)

# Create a folium map centered on Croatia
m = folium.Map(location=[45.1, 15.2], zoom_start=7)

# Function to style each feature based on percentage
def style_function(feature):
    return {
        'fillColor': feature['properties']['color'],
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.5
    }

# Add GeoJSON layer with hover functionality
folium.GeoJson(
    parlamentarni_izbori,
    style_function=style_function,
    tooltip=folium.GeoJsonTooltip(
        fields=['hover_info'], 
        aliases=['Info: '],
        localize=True
    )
).add_to(m)

# Add the colormap to the map
colormap.add_to(m)

# Display the map directly in the notebook
display(m)


2. Popis stanovništva

In [33]:
def create_interactive_population_map(df, color_column_name):
    # Merge dataframes based on the matching columns
    df = geojson_data.merge(df, on=['Županija', 'Grad/općina'], how='inner')

    # Ensure the necessary columns are present
    if 'Županija' not in df.columns or 'Grad/općina' not in df.columns or 'Ukupno' not in df.columns:
        raise ValueError("The CSV must contain 'Županija', 'Grad/općina', and 'Ukupno' columns.")
    
    # Convert Timestamp columns to strings
    for col in df.select_dtypes(include=['datetime64[ns, UTC]', 'datetime64[ns]']).columns:
        df[col] = df[col].astype(str)

    # Prepare hover information with columns that end with '%'
    df['hover_info'] = df.apply(
        lambda row: f"Županija: {row['Županija']}<br>Grad/Općina: {row['Grad/općina']}<br>Ukupno: {row['Ukupno']}<br>" +
                    ''.join([f"<br>{col}: {row[col]}" for col in df.columns if col.endswith('%')]), axis=1
    )
    
    # Check if the color_column_name exists in the DataFrame
    if color_column_name not in df.columns:
        raise ValueError(f"The specified color column '{color_column_name}' does not exist in the CSV file.")

    # Create a color map based on the specified color column
    min_value = df[color_column_name].min()
    max_value = df[color_column_name].max()
    
    colormap = folium.LinearColormap(
        colors=['blue', 'yellow', 'red'],
        vmin=min_value, 
        vmax=max_value,
        caption=color_column_name
    )
    
    # Add color column based on the value of the specified column
    df['color'] = df[color_column_name].apply(lambda x: colormap(x))

    # Create a folium map centered on Croatia
    m = folium.Map(location=[45.1, 15.2], zoom_start=7)

    # Function to style each feature based on the color column
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': 'black',
            'weight': 1,
            'fillOpacity': 0.5
        }

    # Filter out rows with missing geometries
    df = df.dropna(subset=['geometry'])

    # Add GeoJSON layer with hover functionality
    folium.GeoJson(
        df,
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(
            fields=['hover_info'], 
            aliases=['Info: '],
            localize=True
        )
    ).add_to(m)

    # Add the colormap to the map
    colormap.add_to(m)

    # Display the map directly in the notebook
    display(m)


In [12]:
def create_static_population_map(df, color_column_name):
    # Merge dataframes based on the matching columns
    df = geojson_data.merge(df, on=['Županija', 'Grad/općina'], how='inner')

    # Ensure the necessary columns are present
    if 'Županija' not in df.columns or 'Grad/općina' not in df.columns or 'Ukupno' not in df.columns:
        raise ValueError("The CSV must contain 'Županija', 'Grad/općina', and 'Ukupno' columns.")
    
    # Convert Timestamp columns to strings
    for col in df.select_dtypes(include=['datetime64[ns, UTC]', 'datetime64[ns]']).columns:
        df[col] = df[col].astype(str)
    
    # Check if the color_column_name exists in the DataFrame
    if color_column_name not in df.columns:
        raise ValueError(f"The specified color column '{color_column_name}' does not exist in the CSV file.")

    df.plot(
      column= color_column_name,
      cmap='viridis',            
      legend=True,               
      figsize=(15, 15)            
    ) 

    # Filter out rows with missing geometries
    df = df.dropna(subset=['geometry'])

2.1. STANOVNIŠTVO PREMA NARODNOSTI PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [4]:
narodnost = pd.read_csv('data/census/narodnost.csv')

In [None]:
create_interactive_population_map(narodnost, 'Hrvati, %')

In [None]:
create_static_population_map(narodnost, 'Slovenci, %')

2.2. STANOVNIŠTVO PREMA VJERI PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [19]:
vjera = pd.read_csv('data/census/vjera.csv')

In [None]:
create_interactive_population_map(vjera, 'Katolici, %')

In [None]:
create_static_population_map(vjera, 'Katolici, %')

2.3. STANOVNIŠTVO PREMA DRŽAVLJANSTVU PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [28]:
državljanstvo = pd.read_csv('data/census/državljanstvo.csv')

In [None]:
create_interactive_population_map(državljanstvo, 'Hrvatsko (ukupno), %')

In [None]:
create_static_population_map(državljanstvo, 'Hrvatsko (ukupno), %')

2.4. STANOVNIŠTVO PREMA MATERINSKOM JEZIKU PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [37]:
materinski_jezik = pd.read_csv('data/census/materinski_jezik.csv')

In [None]:
create_interactive_population_map(materinski_jezik, 'Hrvatski, %')

In [None]:
create_static_population_map(materinski_jezik, 'Hrvatski, %')