In [2]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from datetime import datetime



# Last inn datasettet
df = pd.read_csv('Ruter_data.csv', sep=';', encoding='utf-8', on_bad_lines='skip')

# ------ cleaning and filterng the dataset 
# Fjern rader med manglende verdier i nødvendige kolonner
df = df.dropna(subset=['Passasjerer_Ombord', 'Dato', 'Linjenavn'])

# Filtrer for en spesifikk busslinje (f.eks. linje 100)
bus_df = df[df['Linjenavn'] == '100']

def convert_date_to_numeric(date_str):
    base_date = datetime.strptime("01/01/2000", "%d/%m/%Y")  # Referansedato
    current_date = datetime.strptime(date_str, "%d/%m/%Y")  # Konverter dato fra datasettet
    return (current_date - base_date).days  # Forskjell i dager

bus_df['Dato_Numeric'] = bus_df['Dato'].apply(convert_date_to_numeric)

# -- -------- training the model 

X = bus_df[['Dato_Numeric']]  # Funksjon: numerisk dato
y = bus_df['Passasjerer_Ombord']  # Målvariabel: antall passasjerer

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

# Gjør prediksjoner på testsettet
y_pred = model.predict(X_test)

# Evaluer modellen
r2 = r2_score(y_test, y_pred)  # Prediction Accuracy Score
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

# Print ytelse
print("Modellens ytelse:")
print(f"Prediction Accuracy Score (R²): {r2:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")

# Test prediksjon for en spesifikk dato
test_date = "01/01/2025"  # Velg en dato
test_date_numeric = convert_date_to_numeric(test_date)

# Konverter testdato til en DataFrame for å unngå advarsel
test_date_df = pd.DataFrame([[test_date_numeric]], columns=['Dato_Numeric'])

predicted_passengers = model.predict(test_date_df)
print(f"Antall passasjerer for datoen {test_date}: {predicted_passengers[0]:.0f}")

Modellens ytelse:
Prediction Accuracy Score (R²): -0.00
Mean Absolute Error (MAE): 6.23
Mean Squared Error (MSE): 64.74
Antall passasjerer for datoen 01/01/2025: 30


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bus_df['Dato_Numeric'] = bus_df['Dato'].apply(convert_date_to_numeric)
