In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

db = pd.read_csv('datasets/bike_munich/rad_tage.csv')

# Load and clean data (assuming you already loaded db as your DataFrame)
db = db.dropna(subset=['gesamt', 'min-temp', 'max-temp', 'niederschlag', 'sonnenstunden'])

# Convert date to datetime
db['datum'] = pd.to_datetime(db['datum'], format='%Y.%m.%d')

# Feature engineering: Day of week and month
db['wochentag'] = db['datum'].dt.dayofweek  # 0 = Monday
db['monat'] = db['datum'].dt.month

# One-hot encode measuring stations
db = pd.get_dummies(db, columns=['zaehlstelle'], drop_first=True)

# Define features and target
feature_cols = ['min-temp', 'max-temp', 'niederschlag', 'sonnenstunden', 'wochentag', 'monat'] + \
               [col for col in db.columns if col.startswith('zaehlstelle_')]
X = db[feature_cols]
y = db['gesamt']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForestRegressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Predict for tomorrow (example values)
tomorrow = pd.DataFrame([{
    'min-temp': 8.5,
    'max-temp': 17.2,
    'niederschlag': 0.1,
    'sonnenstunden': 6.0,
    'wochentag': 2,  # Wednesday
    'monat': 4,
    **{col: 0 for col in X.columns if col.startswith('zaehlstelle_')}  # Default to one station (e.g., all zeros)
}])

# Optional: Set a station manually (e.g., zaehlstelle_Hirsch = 1)
if 'zaehlstelle_Hirsch' in tomorrow.columns:
    tomorrow['zaehlstelle_Hirsch'] = 1

# Predict
prediction = model.predict(tomorrow)
print(f"Tomorrow's Predicted Bike Traffic: {prediction}")


Mean Squared Error: 502019.0050725123
R-squared: 0.8508173929701432
Tomorrow's Predicted Bike Traffic: [1809.49]


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load dataset
db = pd.read_csv('datasets/bike_munich/rad_tage.csv')

# Clean data
db = db.dropna(subset=['gesamt', 'min-temp', 'max-temp', 'niederschlag', 'sonnenstunden'])

# Convert date to datetime
db['datum'] = pd.to_datetime(db['datum'], format='%Y.%m.%d')

# Feature engineering
db['wochentag'] = db['datum'].dt.dayofweek  # 0 = Monday
db['monat'] = db['datum'].dt.month

# One-hot encode stations
db = pd.get_dummies(db, columns=['zaehlstelle'], drop_first=True)

# Define features and target
feature_cols = ['min-temp', 'max-temp', 'niederschlag', 'sonnenstunden', 'wochentag', 'monat'] + \
               [col for col in db.columns if col.startswith('zaehlstelle_')]
X = db[feature_cols]
y = db['gesamt']

# Save feature columns for prediction reference
joblib.dump(feature_cols, 'feature_columns.pkl')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"✅ Model trained!")
print(f"📉 Mean Squared Error: {mse:.2f}")
print(f"📈 R² Score: {r2:.2f}")

# Save the model
joblib.dump(model, 'model.pkl')
print("💾 Model saved to model.pkl")


✅ Model trained!
📉 Mean Squared Error: 502019.01
📈 R² Score: 0.85
💾 Model saved to model.pkl


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load dataset
db = pd.read_csv('datasets/bike_munich/rad_tage.csv')

# Clean data
db = db.dropna(subset=['gesamt', 'min-temp', 'max-temp', 'niederschlag', 'sonnenstunden'])

# Convert date to datetime
db['datum'] = pd.to_datetime(db['datum'], format='%Y.%m.%d')

# Feature engineering
db['wochentag'] = db['datum'].dt.dayofweek  # 0 = Monday
db['monat'] = db['datum'].dt.month

# One-hot encode stations (drop Hirsch station if it exists)
db = pd.get_dummies(db, columns=['zaehlstelle'], drop_first=True)

# Remove 'zaehlstelle_Hirsch' if it exists
if 'zaehlstelle_Hirsch' in db.columns:
    db = db.drop(columns=['zaehlstelle_Hirsch'])

# Define features and target
feature_cols = ['min-temp', 'max-temp', 'niederschlag', 'sonnenstunden', 'wochentag', 'monat'] + \
               [col for col in db.columns if col.startswith('zaehlstelle_')]
X = db[feature_cols]
y = db['gesamt']

# Save feature columns for prediction reference
joblib.dump(feature_cols, 'feature_columns.pkl')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"✅ Model trained!")
print(f"📉 Mean Squared Error: {mse:.2f}")
print(f"📈 R² Score: {r2:.2f}")

# Save the model
joblib.dump(model, 'model.pkl')
print("💾 Model saved to model.pkl")


✅ Model trained!
📉 Mean Squared Error: 536402.99
📈 R² Score: 0.84
💾 Model saved to model.pkl
