In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [2]:
df2_origineel = pd.read_excel('Doden.xlsx')
df2_origineel.head()


Unnamed: 0,Perioden,Totaal,Brom/Snorfiets
0,2010,640,43
1,2011,661,45
2,2012,650,50
3,2013,570,53
4,2014,570,47


In [3]:

df2 = df2_origineel.rename(columns={
    "Perioden": "Year",
    "Totaal": "Total",
    "Brom/Snorfiets": "MopedDeaths"
})[["Year", "Total", "MopedDeaths"]]

long = df2.melt(id_vars="Year",
                value_vars=["Total", "MopedDeaths"],
                var_name="Serie", value_name="Aantal")



# 2) Zorg dat Year/Aantal numeriek zijn
long["Year"] = pd.to_numeric(long["Year"], errors="coerce")
long["Aantal"] = pd.to_numeric(long["Aantal"], errors="coerce")

# 3) Periode-labels + gecombineerde groep
long["Periode"] = np.where(long["Year"] < 2023, "Voor helmplicht", "Na helmplicht")
long["Groep"] = long["Serie"] + " — " + long["Periode"]

# 4) Plot met OLS-trendlijnen per groep
fig = px.scatter(
    long,
    x="Year", y="Aantal",
    color="Groep",               # 4 groepen => 4 trendlijnen
    trendline="ols",
    trendline_scope="trace",     # per kleur-trace
    labels={"Year": "Jaar", "Aantal": "Aantal gebruikers"},
    title="Aantal doden — Bromfiets vs. Snorfiets (voor en na helmplicht)"
)
fig.add_vline(x=2023, line_dash="dash", line_color="red",
              annotation_text="Helmet law inplemented ", annotation_position="top right")


fig.update_layout(xaxis=dict(dtick=1))
fig.show()

In [4]:
res = px.get_trendline_results(fig)

# bepaal kolomnaam waarin de modellen zitten
col_model = "px_fit_results" if "px_fit_results" in res.columns else "results"

# bepaal kolomnaam voor de groepsnaam
col_group = "Serie" if "Serie" in res.columns else ("color" if "color" in res.columns else res.columns[0])

# veilige extractie van de parameters
rows = []
for i, row in res.iterrows():
    model = row[col_model]
    try:
        params = model.params
        # Zorg dat we altijd labels hebben
        if isinstance(params, (list, tuple, np.ndarray)):
            a, b = params
        else:
            a = params.get("const", list(params)[0] if len(params) > 0 else np.nan)
            b = params.get("Year", list(params)[-1] if len(params) > 1 else np.nan)
        rows.append({
            "Serie": row.get(col_group, f"groep_{i}"),
            "intercept": float(a),
            "slope_per_jaar": float(b),
            "R2": float(model.rsquared)
        })
    except Exception as e:
        print(f"⚠️ Skipping group {i}: {e}")

summary = pd.DataFrame(rows)
print(summary)

                           Serie     intercept  slope_per_jaar        R2
0        Total — Voor helmplicht  -5525.692308        3.054945  0.060440
1          Total — Na helmplicht  18891.000000       -9.000000  1.000000
2  MopedDeaths — Voor helmplicht     90.230769       -0.021978  0.000283
3    MopedDeaths — Na helmplicht   4078.000000       -2.000000  1.000000


In [5]:
years = [2026, 2027, 2028, 2029, 2030]

#y = a + bx 
Totaal_a = int(summary.loc[1, 'intercept'])
SnorBrom_a = int(summary.loc[3, 'intercept'])
Totaal_b = int(summary.loc[1, 'slope_per_jaar'])
SnorBrom_b = int(summary.loc[3, 'slope_per_jaar'])

Doden_totaal = []
Doden_snorbrom = []


for i in years:
    Doden_totaal.append(Totaal_a + Totaal_b*i)
    Doden_snorbrom.append(SnorBrom_a + SnorBrom_b*i)

df_future = pd.DataFrame({
    "Perioden": years,
    "Totaal": Doden_totaal,
    "Brom/Snorfiets": Doden_snorbrom
})

df_combined = pd.concat([df2_origineel, df_future], ignore_index=True)


In [7]:
print("Totaal_a:", Totaal_a, "Totaal_b:", Totaal_b)
print("SnorBrom_a:", SnorBrom_a, "SnorBrom_b:", SnorBrom_b)

Totaal_a: 18890 Totaal_b: -8
SnorBrom_a: 4077 SnorBrom_b: -1


In [6]:
fig = px.line(df_combined, x="Perioden", y=["Totaal", "Brom/Snorfiets"], markers=True,
              labels={"value": "Aantal", "variable": "Type", "Year": "Jaar"},
              title="Aantal voorspeld per jaar")
fig.show()