Connecting data

In [1]:
%pip install pandas
%pip install folium
%pip install mapclassify
%pip install geopandas
%pip install selenium


In [2]:
import pandas as pd
import geopandas as gpd
import folium
from IPython.display import display
import matplotlib.pyplot as plt

In [3]:
# Load GeoJSON data
geojson_data = gpd.read_file("data/administrative_units/gradovi_opcine_zupanije.geojson")

In [4]:
# Ensure that the key columns are named consistently
geojson_data['Županija'] = geojson_data['text_right'].str.strip()
geojson_data['Grad/općina'] = geojson_data['text_left'].str.strip()

In [5]:
def create_interactive_population_map(df, color_column_name):
    # Merge dataframes based on the matching columns
    df = geojson_data.merge(df, on=['Županija', 'Grad/općina'], how='inner')

    # Ensure the necessary columns are present
    if 'Županija' not in df.columns or 'Grad/općina' not in df.columns or 'Ukupno' not in df.columns:
        raise ValueError("The CSV must contain 'Županija', 'Grad/općina', and 'Ukupno' columns.")
    
    # Convert Timestamp columns to strings
    for col in df.select_dtypes(include=['datetime64[ns, UTC]', 'datetime64[ns]']).columns:
        df[col] = df[col].astype(str)

    # Prepare hover information with columns that end with '%'
    df['hover_info'] = df.apply(
        lambda row: f"Županija: {row['Županija']}<br>Grad/Općina: {row['Grad/općina']}<br>Ukupno: {row['Ukupno']}<br>" +
                    ''.join([f"<br>{col}: {row[col]}" for col in df.columns if col.endswith('%')]), axis=1
    )
    
    # Check if the color_column_name exists in the DataFrame
    if color_column_name not in df.columns:
        raise ValueError(f"The specified color column '{color_column_name}' does not exist in the CSV file.")

    # Create a color map based on the specified color column
    min_value = df[color_column_name].min()
    max_value = df[color_column_name].max()
    
    colormap = folium.LinearColormap(
        colors=['blue', 'yellow', 'red'],
        vmin=min_value, 
        vmax=max_value,
        caption=color_column_name
    )
    
    # Add color column based on the value of the specified column
    df['color'] = df[color_column_name].apply(lambda x: colormap(x))

    # Create a folium map centered on Croatia
    m = folium.Map(location=[45.1, 15.2], zoom_start=7)

    # Function to style each feature based on the color column
    def style_function(feature):
        return {
            'fillColor': feature['properties']['color'],
            'color': 'black',
            'weight': 1,
            'fillOpacity': 0.5
        }

    # Filter out rows with missing geometries
    df = df.dropna(subset=['geometry'])

    # Add GeoJSON layer with hover functionality
    folium.GeoJson(
        df,
        style_function=style_function,
        tooltip=folium.GeoJsonTooltip(
            fields=['hover_info'], 
            aliases=['Info: '],
            localize=True
        )
    ).add_to(m)

    # Add the colormap to the map
    colormap.add_to(m)

    # Display the map directly in the notebook
    display(m)


In [6]:
def create_static_population_map(df, color_column_name):
    # Merge dataframes based on the matching columns
    df = geojson_data.merge(df, on=['Županija', 'Grad/općina'], how='inner')

    # Ensure the necessary columns are present
    if 'Županija' not in df.columns or 'Grad/općina' not in df.columns or 'Ukupno' not in df.columns:
        raise ValueError("The CSV must contain 'Županija', 'Grad/općina', and 'Ukupno' columns.")
    
    # Convert Timestamp columns to strings
    for col in df.select_dtypes(include=['datetime64[ns, UTC]', 'datetime64[ns]']).columns:
        df[col] = df[col].astype(str)
    
    # Check if the color_column_name exists in the DataFrame
    if color_column_name not in df.columns:
        raise ValueError(f"The specified color column '{color_column_name}' does not exist in the CSV file.")
    
    plot = df.plot(
        column=color_column_name, 
        legend=True, 
        figsize=(15, 15)
    )
    plot.set_title(color_column_name, fontsize=14)

    # Filter out rows with missing geometries
    df = df.dropna(subset=['geometry'])

In [7]:
def create_multiple_static_population_maps(df):
    for col in df.columns:
        if col.endswith('%') and col != 'Ukupno, %' and col != 'Ukupno %':
            create_static_population_map(df, col)

STANOVNIŠTVO PREMA NARODNOSTI PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [8]:
narodnost = pd.read_csv('data/census/narodnost.csv')

In [9]:
create_interactive_population_map(narodnost, 'Hrvati, %')

In [10]:
create_static_population_map(narodnost, 'Hrvati, %')

In [11]:
create_multiple_static_population_maps(narodnost)

STANOVNIŠTVO PREMA VJERI PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [12]:
vjera = pd.read_csv('data/census/vjera.csv')

In [13]:
create_interactive_population_map(vjera, 'Katolici, %')

In [14]:
create_static_population_map(vjera, 'Katolici, %')

In [15]:
create_multiple_static_population_maps(vjera)

STANOVNIŠTVO PREMA DRŽAVLJANSTVU PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [16]:
državljanstvo = pd.read_csv('data/census/državljanstvo.csv')

In [17]:
create_interactive_population_map(državljanstvo, 'Hrvatsko (ukupno), %')

In [18]:
create_static_population_map(državljanstvo, 'Hrvatsko (ukupno), %')

In [19]:
create_multiple_static_population_maps(državljanstvo)

STANOVNIŠTVO PREMA MATERINSKOM JEZIKU PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [20]:
materinski_jezik = pd.read_csv('data/census/materinski_jezik.csv')

In [21]:
create_interactive_population_map(materinski_jezik, 'Hrvatski, %')

In [22]:
create_static_population_map(materinski_jezik, 'Hrvatski, %')

In [23]:
create_multiple_static_population_maps(materinski_jezik)

STANOVNIŠTVO PREMA BRAČNOM STATUSU PO GRADOVIMA/OPĆINAMA, POPIS 2021.

In [24]:
bračni_status = pd.read_csv('data/census/bračni_status.csv')
bračni_status = bračni_status[bračni_status['Starost'] == 'Ukupno']

In [25]:
create_interactive_population_map(bračni_status, 'Neoženjen %')

In [26]:
create_static_population_map(bračni_status, 'Neoženjen %')

In [27]:
create_multiple_static_population_maps(bračni_status)