In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import pickle
import datetime

In [8]:
data = pd.read_csv("final_merged_data.csv")

In [5]:
data["temperature"] = (data["max_air_temperature_celsius"] + data["min_air_temperature_celsius"]) / 2
data["humidity"] = (data["max_relative_humidity_percent"] + data["min_relative_humidity_percent"]) / 2
data["pressure"] = (data["max_barometric_pressure_hpa"] + data["min_barometric_pressure_hpa"]) / 2

data = data[["num_bikes_available","station_id","year", "month", "day", "hour", "temperature", "humidity", "pressure"]]
data = data.groupby(["station_id", "year", "month", "day", "hour"]).mean().reset_index()
data["num_bikes_available"] = round(data["num_bikes_available"])
data["day_name"] = pd.to_datetime(data[["year", "month", "day"]]).dt.day_name()
data["is_weekday"] = data["day_name"].apply(lambda x: 0 if x == "Sunday" or x=="Saturday" else 1)
data = data[["num_bikes_available","station_id", "is_weekday", "hour", "temperature", "humidity", "pressure"]]

In [79]:
data.head()

Unnamed: 0,num_bikes_available,station_id,is_weekday,hour,temperature,humidity,pressure
0,20.0,1,0,0,13.89,84.475,1002.4225
1,20.0,1,0,5,12.235,87.95,1000.2
2,22.0,1,0,6,11.263333,89.583333,1000.108333
3,25.0,1,0,7,11.236667,89.633333,1000.151667
4,30.0,1,0,8,11.68,86.3,1000.325


In [80]:
unique_ids = data["station_id"].unique()
for id in unique_ids:
    id_data = data[data["station_id"] == id]
    X = id_data[["is_weekday", "hour", "temperature", "humidity", "pressure"]]
    y = id_data["num_bikes_available"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Absolute Error for station {id}: {mae}")
    print(f"R² Score for station {id}: {r2}")


    model_filename = f"avail_station_{id}.pkl"
    with open(model_filename, "wb") as file:
        pickle.dump(model, file)

    print(f"Model saved to {model_filename} for station {id}")



Mean Absolute Error for station 1: 6.835697851499696
R² Score for station 1: 0.14122781807488982
Model saved to avail_station_1.pkl for station 1
Mean Absolute Error for station 2: 2.6826989264443184
R² Score for station 2: 0.17451637478800308
Model saved to avail_station_2.pkl for station 2
Mean Absolute Error for station 3: 4.552146429661017
R² Score for station 3: -0.048385234078357486
Model saved to avail_station_3.pkl for station 3
Mean Absolute Error for station 4: 4.575037770395293
R² Score for station 4: 0.07641158589542729
Model saved to avail_station_4.pkl for station 4
Mean Absolute Error for station 5: 10.569715325965632
R² Score for station 5: 0.12800842417898028
Model saved to avail_station_5.pkl for station 5
Mean Absolute Error for station 6: 4.65750284762502
R² Score for station 6: 0.2716266684124863
Model saved to avail_station_6.pkl for station 6
Mean Absolute Error for station 7: 5.96068658937712
R² Score for station 7: 0.055189419785037885
Model saved to avail_stat

In [None]:
# Load the pickled model from the .pkl file


In [83]:
bikes_predict(1, [1, 0, 1], [8, 14, 17], [15.5, 20.2, 18.3], [80, 65, 75], [1012, 1010, 1013])

Predictions: [15.42923073 19.94882091 17.31518963]
