In [9]:
# Import all the requireed modules
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.utils import resample
import pickle
import bz2

# Data Preprocessing
## Loading Data

In [3]:
# Load the dataset
df = pd.read_csv("final_merged_data.csv")

# Handle missing values (drop rows with NaN in lagged features)
df.dropna(inplace=True)

df.shape

(298946, 78)

## Feature Convertion

In [5]:
# Convert last_reported to datetime
df["last_reported"] = pd.to_datetime(df["last_reported"])

# Create new features
df["day_of_week"] = df["last_reported"].dt.dayofweek

Index(['last_reported', 'station_id', 'num_bikes_available',
       'num_docks_available', 'is_installed', 'is_renting', 'is_returning',
       'name', 'address', 'lat', 'lon', 'capacity', 'stno', 'year', 'month',
       'day', 'hour', 'minute', 'max_air_temp_quality_indicator',
       'max_air_temperature_celsius', 'min_air_temp_quality_indicator',
       'min_air_temperature_celsius', 'air_temp_std_quality_indicator',
       'air_temperature_std_deviation', 'max_grass_temp_quality_indicator',
       'max_grass_temperature_celsius', 'min_grass_temp_quality_indicator',
       'min_grass_temperature_celsius', 'grass_temp_std_quality_indicator',
       'grass_temperature_std_deviation',
       'max_soil_temp_5cm_quality_indicator',
       'max_soil_temperature_5cm_celsius',
       'min_soil_temp_5cm_quality_indicator',
       'min_soil_temperature_5cm_celsius',
       'soil_temp_std_5cm_quality_indicator',
       'soil_temperature_std_deviation_5cm',
       'max_soil_temp_10cm_quality_in

# Model Training

In [17]:
# Select features
features = ['station_id','max_air_temperature_celsius', 'hour', 'day_of_week']

# Split data for training/testing
df_train, df_test = train_test_split(df, test_size=0.3, random_state=42, shuffle=True)

def train_model(df, features, target):
    X = df[features]
    y = df[target]

    # Train a linear regression model
    model = LinearRegression()
    model.fit(X, y)

    return model

linear_model_bike = train_model(df_train, features, "num_bikes_available")
linear_model_stand = train_model(df_train, features, "num_docks_available")

# Model Evaluation

In [18]:
def evaluate_model(df, features, target, model):
    X_test = df[features]
    y_test = df[target]

    # Make predictions
    y_pred = model.predict(X_test)
    print(f"===Evaluation of Prediction Model on {target}")

    # Evaluate the model
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Absolute Error: {mae}")
    print(f"R² Score: {r2}")

    # Display model coefficients
    print("\nModel Coefficients:")
    for feature, coef in zip(features, model.coef_):
        print(f"{feature}: {coef}")
    print(f"Intercept: {model.intercept_}")

In [19]:
evaluate_model(df_test, features, "num_bikes_available", linear_model_bike)

===Evaluation of Prediction Model on num_bikes_available
Mean Absolute Error: 8.143381387483206
R² Score: 3.3673023225322396e-05

Model Coefficients:
station_id: -0.00022319831381595329
max_air_temperature_celsius: 0.008241699339950809
hour: -0.007231808966415689
day_of_week: 0.049937746316071945
Intercept: 12.118080026742923


In [20]:
evaluate_model(df_test, features, "num_docks_available", linear_model_stand)

===Evaluation of Prediction Model on num_docks_available
Mean Absolute Error: 8.412798396842275
R² Score: 0.13536223062456687

Model Coefficients:
station_id: 0.11874294153613411
max_air_temperature_celsius: -0.0289455302714171
hour: -0.01932124007603155
day_of_week: -0.06690845025153784
Intercept: 13.083090355141435


# Export Model

In [21]:
def save_model(filename, model):
    with open(filename, "wb") as file:
        pickle.dump(model, file)

In [22]:
save_model("bike_availability_model.pkl", linear_model_bike)
save_model("stand_availability_model.pkl", linear_model_stand)