In [38]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    mean_absolute_error,
    r2_score,
    precision_score,
    recall_score
)
import numpy as np
import pandas as pd
import joblib
import pickle
import datetime

In [39]:
data = pd.read_csv("final_merged_data.csv")

In [40]:
data[data["station_id"] == 1]["capacity"].iloc[0]

31

In [41]:
data["temperature"] = (data["max_air_temperature_celsius"] + data["min_air_temperature_celsius"]) / 2
data["humidity"] = (data["max_relative_humidity_percent"] + data["min_relative_humidity_percent"]) / 2
data["pressure"] = (data["max_barometric_pressure_hpa"] + data["min_barometric_pressure_hpa"]) / 2

data = data[["num_bikes_available","station_id","year", "month", "day", "hour", "temperature", "humidity", "pressure"]]
data = data.groupby(["station_id", "year", "month", "day", "hour"]).mean().reset_index()
data["num_bikes_available"] = round(data["num_bikes_available"])
data["day_name"] = pd.to_datetime(data[["year", "month", "day"]]).dt.day_name()
data["is_weekday"] = data["day_name"].apply(lambda x: 0 if x == "Sunday" or x=="Saturday" else 1)
data = data[["num_bikes_available","station_id", "is_weekday", "hour", "temperature", "humidity", "pressure"]]

In [42]:
data.head()

Unnamed: 0,num_bikes_available,station_id,is_weekday,hour,temperature,humidity,pressure
0,20.0,1,0,0,13.89,84.475,1002.4225
1,20.0,1,0,5,12.235,87.95,1000.2
2,22.0,1,0,6,11.263333,89.583333,1000.108333
3,25.0,1,0,7,11.236667,89.633333,1000.151667
4,30.0,1,0,8,11.68,86.3,1000.325


In [43]:
models = {
    "LinearRegression": LinearRegression(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "RandomForest": RandomForestRegressor(n_estimators=50, random_state=42),
    "KNN": KNeighborsRegressor(n_neighbors=5),
    "GradientBoosting": GradientBoostingRegressor(random_state=42)
}

metrics = ["mae", "r2", "precision", "recall"]
model_performance = {model: {m: [] for m in metrics} for model in models}

unique_ids = data["station_id"].unique()

In [44]:
for station_id in unique_ids:
    id_data = data[data["station_id"] == station_id]
    
    X = id_data[["is_weekday", "hour", "temperature", "humidity", "pressure"]]
    y = id_data["num_bikes_available"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        y_test_bin = (y_test > 0).astype(int)
        y_pred_bin = (y_pred > 0).astype(int)

        precision = precision_score(y_test_bin, y_pred_bin, zero_division=0)
        recall = recall_score(y_test_bin, y_pred_bin, zero_division=0)

        model_performance[name]["mae"].append(mae)
        model_performance[name]["r2"].append(r2)
        model_performance[name]["precision"].append(precision)
        model_performance[name]["recall"].append(recall)

# Output average performance
print(" Average Performance Across All Stations:")
for name, scores in model_performance.items():
    print(f" Model: {name}")
    for metric in metrics:
        avg_score = np.mean(scores[metric])
        print(f"  {metric.upper():<9}: {avg_score:.4f}")

 Average Performance Across All Stations:
 Model: LinearRegression
  MAE      : 5.6490
  R2       : 0.1550
  PRECISION: 0.9013
  RECALL   : 0.9793
 Model: DecisionTree
  MAE      : 3.5598
  R2       : 0.3448
  PRECISION: 0.9334
  RECALL   : 0.9368
 Model: RandomForest
  MAE      : 3.2262
  R2       : 0.6248
  PRECISION: 0.9014
  RECALL   : 0.9825
 Model: KNN
  MAE      : 4.0989
  R2       : 0.4495
  PRECISION: 0.9044
  RECALL   : 0.9816
 Model: GradientBoosting
  MAE      : 3.8314
  R2       : 0.5351
  PRECISION: 0.9030
  RECALL   : 0.9792


In [45]:
data.keys()

Index(['num_bikes_available', 'station_id', 'is_weekday', 'hour',
       'temperature', 'humidity', 'pressure'],
      dtype='object')

In [48]:
for id in unique_ids:
    # capacity = data[data["station_id"] == id]["capacity"].iloc[0]
    id_data = data[data["station_id"] == id]
    X = id_data[["is_weekday", "hour", "temperature", "humidity", "pressure"]]
    y = id_data["num_bikes_available"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = RandomForestRegressor(n_estimators=50, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)


    model_filename = f"avail_station_{id}.pkl"
    with open(model_filename, "wb") as file:
        pickle.dump(model, file)

    print(f"Model saved to {model_filename} for station {id}")



Model saved to avail_station_1.pkl for station 1
Model saved to avail_station_2.pkl for station 2
Model saved to avail_station_3.pkl for station 3
Model saved to avail_station_4.pkl for station 4
Model saved to avail_station_5.pkl for station 5
Model saved to avail_station_6.pkl for station 6
Model saved to avail_station_7.pkl for station 7
Model saved to avail_station_8.pkl for station 8
Model saved to avail_station_9.pkl for station 9
Model saved to avail_station_10.pkl for station 10
Model saved to avail_station_11.pkl for station 11
Model saved to avail_station_12.pkl for station 12
Model saved to avail_station_13.pkl for station 13
Model saved to avail_station_14.pkl for station 14
Model saved to avail_station_15.pkl for station 15
Model saved to avail_station_16.pkl for station 16
Model saved to avail_station_17.pkl for station 17
Model saved to avail_station_18.pkl for station 18
Model saved to avail_station_19.pkl for station 19
Model saved to avail_station_20.pkl for station 2

In [47]:
# Load the pickled model from the .pkl file


In [83]:
bikes_predict(1, [1, 0, 1], [8, 14, 17], [15.5, 20.2, 18.3], [80, 65, 75], [1012, 1010, 1013])

Predictions: [15.42923073 19.94882091 17.31518963]
