Prikazivanje ovisnosti između različitih varijabli iz popisa stanovništva i izbornih rezultata

In [None]:
%pip install plotly nbformat
#restart kernel after installing nbformat!

In [2]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

In [None]:
election_results = pd.read_csv("data/election_results/kombinirani_rezultati.csv")

party_percentages = election_results.columns[34:61] 
party_percentages = list(party_percentages)
print(party_percentages)

#uzet ćemo samo stranke s više od 1% na nacionalnoj razini, da smanjimo količinu plotova
party_percentages = ['HDZ %', 'SDP %', 'DOMOVINSKI POKRET %', 'MOŽEMO %', 'MOST %', 'FOKUS %', 'SOCIJALDEMOKRATI, REFORMISTI, IDS, PGS %', 'UMIROVLJENICI ZAJEDNO %', 'ODLUČNOST I PRAVEDNOST - OIP %', 'NEZAVISNA PLATFORMA SJEVERA - NPS %']
print(party_percentages)

In [16]:
def correlation(df, x, y):
    correlation = df[x].corr(df[y])
    print(f"Correlation between {x} and {y}: {correlation:.5f}")

    if 'Ukupno' in df.columns:
        population_column = 'Ukupno'
    elif 'Ukupan broj stanovnika' in df.columns:
        population_column = 'Ukupan broj stanovnika'
    weights = df[population_column]
        
    x_mean_w = np.average(df[x], weights=weights)
    y_mean_w = np.average(df[y], weights=weights)

    # Weighted covariance
    cov_xy = np.sum(weights * (df[x] - x_mean_w) * (df[y] - y_mean_w)) / np.sum(weights)

    # Weighted variances
    var_x = np.sum(weights * (df[x] - x_mean_w) ** 2) / np.sum(weights)
    var_y = np.sum(weights * (df[y] - y_mean_w) ** 2) / np.sum(weights)

    # Weighted correlation
    weighted_corr = cov_xy / np.sqrt(var_x * var_y)
    print(f"Population-Weighted Correlation between {x} and {y}: {weighted_corr:.5f}")

def assign_region(row):
    if row['Grad/općina'] in ['SENJ', 'KARLOBAG']:
        return 'ISTRA I KVARNER'
    elif row['Grad/općina'] == 'NOVALJA':
        return 'DALMACIJA'
    elif row['Grad/općina'] == 'GRAČAC':
        return 'SREDIŠNJA HRVATSKA'
    elif row['Županija'] in ['OSJEČKO-BARANJSKA ŽUPANIJA', 'VUKOVARSKO-SRIJEMSKA ŽUPANIJA', 'BRODSKO-POSAVSKA ŽUPANIJA', 'POŽEŠKO-SLAVONSKA ŽUPANIJA', 'VIROVITIČKO-PODRAVSKA ŽUPANIJA']:
        return 'SLAVONIJA'
    elif row['Županija'] in ['KRAPINSKO-ZAGORSKA ŽUPANIJA', 'VARAŽDINSKA ŽUPANIJA', 'MEĐIMURSKA ŽUPANIJA', 'KOPRIVNIČKO-KRIŽEVAČKA ŽUPANIJA']:
        return 'SJEVERNA HRVATSKA'
    elif row['Županija'] in ['ZAGREBAČKA ŽUPANIJA', 'BJELOVARSKO-BILOGORSKA ŽUPANIJA', 'SISAČKO-MOSLAVAČKA ŽUPANIJA', 'KARLOVAČKA ŽUPANIJA', 'LIČKO-SENJSKA ŽUPANIJA']:
        return 'SREDIŠNJA HRVATSKA'
    elif row['Županija'] == 'GRAD ZAGREB':
        return 'ZAGREB'
    elif row['Županija'] in ['ISTARSKA ŽUPANIJA', 'PRIMORSKO-GORANSKA ŽUPANIJA']:
        return 'ISTRA I KVARNER'
    elif row['Županija'] in ['ZADARSKA ŽUPANIJA', 'ŠIBENSKO-KNINSKA ŽUPANIJA', 'SPLITSKO-DALMATINSKA ŽUPANIJA', 'DUBROVAČKO-NERETVANSKA ŽUPANIJA']:
        return 'DALMACIJA'
    else:
        return 'UNKNOWN'

def create_plot(df, x ,y):
    if 'Ukupno' in df.columns:
        population_column = 'Ukupno'
    elif 'Ukupan broj stanovnika' in df.columns:
        population_column = 'Ukupan broj stanovnika'

    # Create first layer: Default scatter
    fig = go.Figure()

    # Default scatter layer 
    fig.add_trace(go.Scatter(
        x=df[x],
        y=df[y],
        mode='markers',
        marker=dict(size=6, color='navy'),
        name='Default Layer',
        hovertemplate=(
            f"<b>{x}"":</b> %{x}<br>"
            f"<b>{y}"":</b> %{y}<br>"
            "<b>Grad/općina:</b> %{customdata[0]}<br>"
            "<b>Županija:</b> %{customdata[1]}<br>"
        ),
        customdata=df[['Grad/općina', 'Županija']].to_numpy()  
    ))

    # Second layer: Scatter with coloring by population
    fig.add_trace(go.Scatter(
        x=df[x],
        y=df[y],
        mode='markers',
        marker=dict(size=6, color=np.log(df[population_column]), colorscale='Viridis'),
        name='Population Layer',
        hovertemplate=(
            f"<b>{x}"":</b> %{x}<br>"
            f"<b>{y}"":</b> %{y}<br>"
            "<b>Grad/općina:</b> %{customdata[0]}<br>"
            "<b>Županija:</b> %{customdata[1]}<br>"
            f"<b>{population_column}"":</b> %{customdata[2]}<br>"
        ),
        customdata=df[['Grad/općina', 'Županija', population_column]].to_numpy(),  
        visible=False  # Initially hidden
    ))

    # Region layer
    df['Regija'] = df.apply(assign_region, axis=1)
    color_map = {
        'SLAVONIJA': 'red',
        'SJEVERNA HRVATSKA': 'orange',
        'SREDIŠNJA HRVATSKA': 'green',
        'ZAGREB': 'magenta',
        'ISTRA I KVARNER': 'teal',
        'DALMACIJA': 'darkblue',
        'UNKNOWN': 'gray'
    }
    df['Color'] = df['Regija'].map(color_map)

    fig.add_trace(go.Scatter(
        x=df[x],
        y=df[y],
        mode='markers',
        marker=dict(size=6, color=df['Color']),
        name='Region Layer',
        hovertemplate=(
            f"<b>{x}"":</b> %{x}<br>"
            f"<b>{y}"":</b> %{y}<br>"
            "<b>Grad/općina:</b> %{customdata[0]}<br>"
            "<b>Županija:</b> %{customdata[1]}<br>"
            "<b>Regija:</b> %{customdata[2]}<br>"
        ),
        customdata=df[['Grad/općina', 'Županija', 'Regija']].to_numpy(),
        visible=False  # Initially hidden
    ))

    # Add buttons to toggle between layers
    fig.update_layout(
        updatemenus=[
            dict(
                type="buttons",
                direction="right",
                x=0.5,
                xanchor="center",
                y=1.15,
                buttons=[
                    dict(label="Default Layer", method="update", args=[{"visible": [True, False, False]}]),
                    dict(label="Population Layer", method="update", args=[{"visible": [False, True, False]}]),
                    dict(label="Region Layer", method="update", args=[{"visible": [False, False, True]}]),
                ],
            )
        ],
        xaxis_title=x,
        yaxis_title=y,
    )

    fig.update_layout(
        margin=dict(l=40, r=40, t=60, b=40), 
        height=600,  
        width=800  
    )

    fig.show()

BRAČNI STATUS

In [None]:
census_df = pd.read_csv("data/census/bračni_status.csv")
census_percentages = census_df.columns[16:26]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Starost'] == 'Ukupno']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

BROJ ŽIVOROĐENE DJECE

In [None]:
census_df = pd.read_csv("data/census/broj_živorođene_djece.csv")
census_percentages = census_df.columns[17:30]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Starost'] == 'Ukupno']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

DOB

In [None]:
census_df = pd.read_csv("data/census/dob.csv")
census_percentages = [col for i, col in enumerate(census_df.columns[14:27]) if i + 14 not in [21, 22]]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Spol'] == 'sv.']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

DRŽAVLJANSTVO

In [None]:
census_df = pd.read_csv("data/census/državljanstvo.csv")
census_percentages = [col for i, col in enumerate(census_df.columns[5:12]) if i + 5 in [5, 7, 9 ,11]]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

EKONOMSKA AKTIVNOST

In [None]:
census_df = pd.read_csv("data/census/ekonomska_aktivnost.csv")
census_percentages = census_df.columns[15:25]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[(df['Starost'] == 'Ukupno') & (df['Spol'] == 'sv.')]

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

MATERINSKI JEZIK

In [None]:
census_df = pd.read_csv("data/census/materinski_jezik.csv")
census_percentages = [col for col in census_df.columns[5:56] if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

MIGRACIJSKA OBILJEŽJA

In [None]:
census_df = pd.read_csv("data/census/migracijska_obilježja.csv")
census_percentages = [col for col in census_df if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Spol'] == 'sv.']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

MJETO ROĐENJA

In [None]:
census_df = pd.read_csv("data/census/mjesto_rođenja_stanovanja.csv")
census_percentages = [col for col in census_df if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Spol'] == 'sv.']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

NARODNOST

In [None]:
census_df = pd.read_csv("data/census/narodnost.csv")
census_percentages = [col for col in census_df.columns[5:] if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

PODRUČJE DJELATNOSTI

In [None]:
census_df = pd.read_csv("data/census/područje_djelatnosti.csv")
census_percentages = [col for col in census_df.columns if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[(df['Starost'] == 'Ukupno') & (df['Spol'] == 'sv.')]

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

POHAĐANJE ŠKOLE

In [None]:
census_df = pd.read_csv("data/census/pohađanje_škole.csv")
census_percentages = [col for col in census_df.columns if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Spol'] == 'sv.']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

POLOŽAJ U ZAPOSLENJU

In [None]:
census_df = pd.read_csv("data/census/položaj_u_zaposlenju.csv")
census_percentages = [col for col in census_df.columns if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[(df['Starost'] == 'Ukupno') & (df['Spol'] == 'sv.')]

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

VJERA

In [None]:
census_df = pd.read_csv("data/census/vjera.csv")
census_percentages = [col for col in census_df.columns[5:] if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

ZAPOSLENOST PREMA ZANIMANJU

In [None]:
census_df = pd.read_csv("data/census/zaposlenost_prema_zanimanju.csv")
census_percentages = [col for col in census_df.columns if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Starost'] == 'Ukupno']

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

ZAVRŠENA ŠKOLA

In [None]:
census_df = pd.read_csv("data/census/završena_škola.csv")
census_percentages = [col for col in census_df.columns if col.endswith('%')]
census_percentages =list(census_percentages)
print(census_percentages)

In [None]:
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[(df['Starost'] == 'Ukupno') & (df['Spol'] == 'sv.')]

for x in census_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

KORELACIJA REZULTATA STRANAKA S REZULTATIMA DRUGIH STRANAKA

In [None]:
census_df = pd.read_csv("data/census/dob.csv")
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Spol'] == 'sv.']

for x in party_percentages:
  for y in party_percentages:
    correlation(df, x, y)
    create_plot(df, x ,y)

KORELACIJA REZULTATA STRANAKA I BROJA STANOVNIKA

In [None]:
census_df = pd.read_csv("data/census/dob.csv")
df = pd.merge(election_results, census_df, left_on=['Županija', 'Grad/općina/država'], right_on=['Županija', 'Grad/općina'], how='inner')
df = df[df['Spol'] == 'sv.']

x = 'Ukupno'
print(x)
for y in party_percentages:
  correlation(df, x, y)
  create_plot(df, x ,y)