In [35]:
import pandas as pd
import plotly.express as px
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

# Trendlijn voor gebruik gewoon over de jaren

In [54]:
df_origineel = pd.read_excel('Totaal_gebruik.xlsx')
df_origineel.rename(
    columns={
        "Snorfiets": "Moped 25 km/h",
        "Bromfiets": "Moped 45 km/h",
        "Totaal": "Total"
    },
    inplace=True
)

df_origineel.head()

Unnamed: 0,Year,Moped 25 km/h,Moped 45 km/h,Total
0,2019,716917,387331,1104248
1,2020,728644,388345,1116989
2,2021,763342,393991,1157333
3,2022,781353,404282,1185635
4,2023,761219,436996,1198215


In [61]:
df = df_origineel.rename(columns={
    "Year": "Year",
    "Moped 25 km/h": "Moped 25 km/h",
    "Moped 45 km/h": "Moped 45 km/h"
})[["Year", "Moped 25 km/h", "Moped 45 km/h"]]

long = df.melt(id_vars="Year",
                value_vars=["Moped 25 km/h", "Moped 45 km/h"],
                var_name="Serie", value_name="Number")


fig = px.scatter(
    long, x="Year", y="Number", color="Serie", trendline = "ols",
    labels={"Year": "Year", "Number": "Number", "Serie": "Moped type"}, 
)
fig.update_traces(hovertemplate="Year %{x}<br> Number %{y:.0f}")
fig.update_layout(title="Number of moped vehicles", xaxis=dict(dtick=1))
fig.show()


## info over de trendlijn

In [38]:
# haal regressieresultaten uit fig
res = px.get_trendline_results(fig)

# bepaal kolomnaam waarin de modellen zitten
col_model = "px_fit_results" if "px_fit_results" in res.columns else "results"

# bepaal kolomnaam voor de groepsnaam
col_group = "Serie" if "Serie" in res.columns else ("color" if "color" in res.columns else res.columns[0])

# veilige extractie van de parameters
rows = []
for i, row in res.iterrows():
    model = row[col_model]
    try:
        params = model.params
        # Zorg dat we altijd labels hebben
        if isinstance(params, (list, tuple, np.ndarray)):
            a, b = params
        else:
            a = params.get("const", list(params)[0] if len(params) > 0 else np.nan)
            b = params.get("Year", list(params)[-1] if len(params) > 1 else np.nan)
        rows.append({
            "Serie": row.get(col_group, f"groep_{i}"),
            "intercept": float(a),
            "slope_per_jaar": float(b),
            "R2": float(model.rsquared)
        })
    except Exception as e:
        print(f"⚠️ Skipping group {i}: {e}")

summary = pd.DataFrame(rows)
print(summary)

       Serie     intercept  slope_per_jaar        R2
0  Snorfiets  3.671116e+07   -17801.857143  0.337696
1  Bromfiets -4.748444e+07    23697.928571  0.854380


R2 is dus hoe goed de voorspelling is. en dan intercept is de constante (a)

y = a + bx 

slope per jaar is dus de b 

x is dus jaar 

dus op basis van deze gegevens zou je voorspellingen voor volgende jaren kunnen maken.

# Trendlijn gesplits met voor en na invoer helmplicht

In [94]:
# 1) Jouw df -> long
long = df.melt(id_vars="Year",
               value_vars=["Moped 25 km/h", "Moped 45 km/h"],
               var_name="Serie", value_name="Number")

# 2) Zorg dat Year/Aantal numeriek zijn
long["Year"] = pd.to_numeric(long["Year"], errors="coerce")
long["Number"] = pd.to_numeric(long["Number"], errors="coerce")

# 3) Periode-labels + gecombineerde groep
long["Periode"] = np.where(long["Year"] <= 2022, "Before helmet law", "After helmet law")
long["Groep"] = long["Serie"] + " — " + long["Periode"]

# 4) Plot met OLS-trendlijnen per groep
fig = px.scatter(
    long,
    x="Year", y="Number",
    color="Groep",               # 4 groepen => 4 trendlijnen
    trendline="ols",
    trendline_scope="trace",     # per kleur-trace
    labels={"Year": "Year", "Number": "Number", "Groep": "Moped type"},
    title="Number of users of moped vehicles (before and after the implementation of the helmet law)"
)
fig.add_vline(x=2023, line_dash="dash", line_color="red",
              annotation_text="Helmet law inplemented ", annotation_position="top right")


fig.update_layout(xaxis=dict(dtick=1))
fig.show()

## Info over de trendlijn

In [95]:
# haal regressieresultaten uit fig
res = px.get_trendline_results(fig)

# bepaal kolomnaam waarin de modellen zitten
col_model = "px_fit_results" if "px_fit_results" in res.columns else "results"

# bepaal kolomnaam voor de groepsnaam
col_group = "Serie" if "Serie" in res.columns else ("color" if "color" in res.columns else res.columns[0])

# veilige extractie van de parameters
rows = []
for i, row in res.iterrows():
    model = row[col_model]
    try:
        params = model.params
        # Zorg dat we altijd labels hebben
        if isinstance(params, (list, tuple, np.ndarray)):
            a, b = params
        else:
            a = params.get("const", list(params)[0] if len(params) > 0 else np.nan)
            b = params.get("Year", list(params)[-1] if len(params) > 1 else np.nan)
        rows.append({
            "Serie": row.get(col_group, f"groep_{i}"),
            "intercept": float(a),
            "slope_per_jaar": float(b),
            "R2": float(model.rsquared)
        })
    except Exception as e:
        print(f"⚠️ Skipping group {i}: {e}")

summary = pd.DataFrame(rows)
print(summary)

                               Serie     intercept  slope_per_jaar        R2
0  Moped 25 km/h — Before helmet law -4.532105e+07         22800.6  0.967070
1   Moped 25 km/h — After helmet law  1.705427e+08        -83927.5  0.993799
2  Moped 45 km/h — Before helmet law -1.102214e+07          5649.9  0.881209
3   Moped 45 km/h — After helmet law -8.591537e+07         42688.0  0.953282


# Voorspelling voor jaren 2026 - 2030

In [96]:
years = [ 2026, 2027, 2028, 2029, 2030]

#y = a + bx 
a_snor = float(summary.loc[1, 'intercept'])
a_brom = float(summary.loc[3, 'intercept'])
b_snor = float(summary.loc[1, 'slope_per_jaar'])
b_brom = float(summary.loc[3, 'slope_per_jaar'])

y_snor = []
y_brom = []


for i in years:
    y_snor.append(a_snor + b_snor*i)
    y_brom.append(a_brom + b_brom*i)

df_future = pd.DataFrame({
    "Year": years,
    "Moped 25 km/h": y_snor,
    "Moped 45 km/h": y_brom
})

df_future["Total"] = df_future["Moped 25 km/h"] + df_future["Moped 45 km/h"]

df_combined_number_vehicles = pd.concat([df_origineel, df_future], ignore_index=True)

print(df_combined_number_vehicles)



    Year  Moped 25 km/h  Moped 45 km/h      Total
0   2019       716917.0       387331.0  1104248.0
1   2020       728644.0       388345.0  1116989.0
2   2021       763342.0       393991.0  1157333.0
3   2022       781353.0       404282.0  1185635.0
4   2023       761219.0       436996.0  1198215.0
5   2024       665809.0       496052.0  1161861.0
6   2025       593364.0       522372.0  1115736.0
7   2026       505609.0       570516.0  1076125.0
8   2027       421681.5       613204.0  1034885.5
9   2028       337754.0       655892.0   993646.0
10  2029       253826.5       698580.0   952406.5
11  2030       169899.0       741268.0   911167.0


In [133]:
fig = px.line(df_combined_number_vehicles, x="Year", y=["Moped 25 km/h", "Moped 45 km/h"], markers=True,
              labels={"value": "Number", "variable": "Moped type", "Year": "Year"},
              title="Predicted number of moped vehicles per year")

fig.add_vline(x=2023, line_dash="dash", line_color="red",
              annotation_text="Helmet law inplemented ", annotation_position="top right")
fig.update_layout(
    width=1200,   
    height=600    
)
fig.show()


# voorspellingen doden

import pandas as pd
import plotly.express as px
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [99]:
df2_origineel = pd.read_excel('Doden.xlsx')
df2_origineel.rename(
    columns={
        "Brom/Snorfiets": "MopedDeaths",
        "Totaal": "Total deaths",
        "Perioden" : "Year"
    },
    inplace=True
)

df2_origineel.head()

Unnamed: 0,Year,Total deaths,MopedDeaths
0,2010,640,43
1,2011,661,45
2,2012,650,50
3,2013,570,53
4,2014,570,47


In [None]:
long = df2_origineel.melt(id_vars="Year",
                value_vars=["Total deaths", "MopedDeaths"],
                var_name="Serie", value_name="Number")




long["Year"] = pd.to_numeric(long["Year"], errors="coerce")
long["Number"] = pd.to_numeric(long["Number"], errors="coerce")


long["Periode"] = np.where(long["Year"] < 2023, "Before helmet law", "After helmet law")
long["Moped type"] = long["Serie"] + " - " + long["Periode"] 

fig = px.scatter(
    long,
    x="Year", y="Number",
    color="Moped type",               
    trendline="ols",
    trendline_scope="trace",    
    labels={"Year": "Year", "Number": "Number of deaths", "Groep": "Moped type"},
    title="Number of deaths (before and after the implementation of the helmet law)"
)
fig.add_vline(x=2023, line_dash="dash", line_color="red",
              annotation_text="Helmet law inplemented ", annotation_position="top right")


fig.update_layout(xaxis=dict(dtick=1))
fig.show()

In [118]:
res = px.get_trendline_results(fig)

# bepaal kolomnaam waarin de modellen zitten
col_model = "px_fit_results" if "px_fit_results" in res.columns else "results"

# bepaal kolomnaam voor de groepsnaam
col_group = "Serie" if "Serie" in res.columns else ("color" if "color" in res.columns else res.columns[0])

# veilige extractie van de parameters
rows = []
for i, row in res.iterrows():
    model = row[col_model]
    try:
        params = model.params
        # Zorg dat we altijd labels hebben
        if isinstance(params, (list, tuple, np.ndarray)):
            a, b = params
        else:
            a = params.get("const", list(params)[0] if len(params) > 0 else np.nan)
            b = params.get("Year", list(params)[-1] if len(params) > 1 else np.nan)
        rows.append({
            "Serie": row.get(col_group, f"groep_{i}"),
            "intercept": float(a),
            "slope_per_jaar": float(b),
            "R2": float(model.rsquared)
        })
    except Exception as e:
        print(f"⚠️ Skipping group {i}: {e}")

summary = pd.DataFrame(rows)
print(summary)

                              Serie     intercept  slope_per_jaar        R2
0  Total deaths - Before helmet law  -5525.692308        3.054945  0.060440
1   Total deaths - After helmet law  18891.000000       -9.000000  1.000000
2   MopedDeaths - Before helmet law     90.230769       -0.021978  0.000283
3    MopedDeaths - After helmet law   4078.000000       -2.000000  1.000000


In [119]:
years = [2025, 2026, 2027, 2028, 2029, 2030]

#y = a + bx 
Totaal_a = float(summary.loc[1, 'intercept'])
SnorBrom_a = float(summary.loc[3, 'intercept'])
Totaal_b = float(summary.loc[1, 'slope_per_jaar'])
SnorBrom_b = float(summary.loc[3, 'slope_per_jaar'])

Doden_totaal = []
Doden_snorbrom = []


for i in years:
    Doden_totaal.append(Totaal_a + (Totaal_b)*i)
    Doden_snorbrom.append(SnorBrom_a + (SnorBrom_b)*i)

df_future = pd.DataFrame({
    "Year": years,
    "Total deaths": Doden_totaal,
    "MopedDeaths": Doden_snorbrom
})

df_combined_deaths = pd.concat([df2_origineel, df_future], ignore_index = True)
df_combined_deaths


Unnamed: 0,Year,Total deaths,MopedDeaths
0,2010,640.0,43.0
1,2011,661.0,45.0
2,2012,650.0,50.0
3,2013,570.0,53.0
4,2014,570.0,47.0
5,2015,621.0,45.0
6,2016,629.0,44.0
7,2017,613.0,46.0
8,2018,678.0,39.0
9,2019,661.0,45.0


In [134]:
fig = px.line(df_combined_deaths, x="Year", y=["Total deaths", "MopedDeaths"], markers=True,
              labels={"value": "Number", "variable": "Vehicle type", "Year": "Year"},
              title="Predicted number of deaths per year")


fig.add_vline(x=2023, line_dash="dash", line_color="red",
              annotation_text="Helmet law inplemented ", annotation_position="top right")
fig.update_layout(
    width=1200,   
    height=600    
)

fig.show()

In [112]:
for df in (df2_origineel, df_future, df_combined_number_vehicles):
    df["Year"] = pd.to_numeric(df["Year"], errors="coerce")


df2_origineel = df2_origineel.groupby("Year", as_index=False).sum(numeric_only=True)
df_future = df_future.groupby("Year", as_index=False).sum(numeric_only=True)
df_combined_number_vehicles = df_combined_number_vehicles.groupby("Year", as_index=False).sum(numeric_only=True)

df_combined = (
    df2_origineel
    .merge(df_future, on="Year", how="outer", suffixes=("", "_future"))
    .merge(df_combined_number_vehicles, on="Year", how="outer")
    .sort_values("Year")
    .reset_index(drop=True)
)

df_combined["Total deaths"] = df_combined["Total deaths_future"].combine_first(df_combined["Total deaths"])
df_combined["MopedDeaths"] = df_combined["MopedDeaths_future"].combine_first(df_combined["MopedDeaths"])
df_combined = df_combined.drop(columns=["Total deaths_future", "MopedDeaths_future"])

df_combined

Unnamed: 0,Year,Total deaths,MopedDeaths,Moped 25 km/h,Moped 45 km/h,Total
0,2010,640.0,43.0,,,
1,2011,661.0,45.0,,,
2,2012,650.0,50.0,,,
3,2013,570.0,53.0,,,
4,2014,570.0,47.0,,,
5,2015,621.0,45.0,,,
6,2016,629.0,44.0,,,
7,2017,613.0,46.0,,,
8,2018,678.0,39.0,,,
9,2019,661.0,45.0,716917.0,387331.0,1104248.0


In [124]:
df = df_combined[df_combined["Year"] >= 2019].copy()
df["Accidents per 1000 vehicles"] = ((
    df["MopedDeaths"] / (df["Moped 25 km/h"] + df["Moped 45 km/h"]) * 1000).round(4)
)
df


Unnamed: 0,Year,Total deaths,MopedDeaths,Moped 25 km/h,Moped 45 km/h,Total,Accidents per 1000 vehicles
9,2019,661.0,45.0,716917.0,387331.0,1104248.0,0.0408
10,2020,610.0,36.0,728644.0,388345.0,1116989.0,0.0322
11,2021,582.0,50.0,763342.0,393991.0,1157333.0,0.0432
12,2022,745.0,54.0,781353.0,404282.0,1185635.0,0.0455
13,2023,684.0,32.0,761219.0,436996.0,1198215.0,0.0267
14,2024,675.0,30.0,665809.0,496052.0,1161861.0,0.0258
15,2025,666.0,28.0,593364.0,522372.0,1115736.0,0.0251
16,2026,657.0,26.0,505609.0,570516.0,1076125.0,0.0242
17,2027,648.0,24.0,421681.5,613204.0,1034885.5,0.0232
18,2028,639.0,22.0,337754.0,655892.0,993646.0,0.0221


In [137]:
fig = px.line(df, x="Year", y="Accidents per 1000 vehicles", markers=True,
              labels={"Accidents per 1000 vehicles": "Number of accidents per 1000 moped vehicles", "Year": "Year"},
              title="Predicted number of moped deaths per year per 1000 moped vehicles")

fig.add_vline(x=2023, line_dash="dash", line_color="red",
              annotation_text="Helmet law inplemented ", annotation_position="top right")
fig.update_layout(
    width=1200,   
    height=600    
)

fig.show()