In [1]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd

db = pd.read_csv('../datasets/bike_munich/rad_tage.csv')

# Assuming you already have your cleaned DataFrame `db` ready
# Filter only needed columns and drop rows with missing values
db = db.dropna(subset=['gesamt', 'min-temp', 'max-temp', 'niederschlag', 'sonnenstunden'])

# Features and target
X = db[['min-temp', 'max-temp', 'niederschlag', 'sonnenstunden']]
y = db['gesamt']

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Gradient Boosting model
gbr = GradientBoostingRegressor(random_state=42)
gbr.fit(X_train, y_train)

# Predict on test set
y_pred = gbr.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Predict for tomorrow’s weather
tomorrow = pd.DataFrame([[8.5, 17.2, 0.1, 6.0]], columns=['min-temp', 'max-temp', 'niederschlag', 'sonnenstunden'])
prediction = gbr.predict(tomorrow)
print("Tomorrow's Predicted Bike Traffic:", prediction)


Mean Squared Error: 2615114.780194208
R-squared: 0.22287874233898397
Tomorrow's Predicted Bike Traffic: [2497.28939581]
