In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from sklearn.metrics import mean_squared_log_error
import joblib
import matplotlib.pyplot as plt

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [3]:
def preprocess(df):
    df['datetime'] = pd.to_datetime(df['datetime'])
    df['hour'] = df['datetime'].dt.hour
    df['day'] = df['datetime'].dt.day
    df['month'] = df['datetime'].dt.month
    df['year'] = df['datetime'].dt.year
    df['weekday'] = df['datetime'].dt.weekday
    return df

train_df = preprocess(train_df)
train_df['log_count'] = np.log1p(train_df['count'])

test_df = preprocess(test_df)

features = ['season', 'weather', 'temp', 'atemp', 'humidity', 'windspeed',
            'hour', 'day', 'month', 'year', 'weekday']
X = train_df[features]
y = train_df['log_count']

print("Features ready:", features)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    'LinearRegression': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=100, random_state=42),
    'XGBoost': xgb.XGBRegressor(n_estimators=100, random_state=42, eval_metric='rmsle')
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmsle = np.sqrt(mean_squared_log_error(np.expm1(y_test), np.expm1(y_pred)))
    results[name] = rmsle
    print(f"{name}: RMSLE = {rmsle:.4f}")


best_model_name = min(results, key=results.get)
best_model = models[best_model_name]
joblib.dump(best_model, 'bike_model.pkl')
print(f"\n Best: {best_model_name} (RMSLE: {results[best_model_name]:.4f})")
print("Model saved: bike_model.pkl")

Features ready: ['season', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'hour', 'day', 'month', 'year', 'weekday']
LinearRegression: RMSLE = 1.0246
RandomForest: RMSLE = 0.3291
XGBoost: RMSLE = 0.3087

 Best: XGBoost (RMSLE: 0.3087)
Model saved: bike_model.pkl
