# Bike Availability Prediction - ML Model for Custom Dataset
This notebook prepares the dataset, creates engineered features, trains a regression model, and evaluates its performance.

## Import Libraries

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import pickle
import joblib

## Load Dataset

In [20]:
df = pd.read_csv('final_merged_data.csv')
df.head()

Unnamed: 0,last_reported,station_id,num_bikes_available,num_docks_available,is_installed,is_renting,is_returning,name,address,lat,...,min_humidity_quality_indicator,min_relative_humidity_percent,humidity_std_quality_indicator,relative_humidity_std_deviation,max_pressure_quality_indicator,max_barometric_pressure_hpa,min_pressure_quality_indicator,min_barometric_pressure_hpa,pressure_std_quality_indicator,barometric_pressure_std_deviation
0,2024-12-01 00:10:00,10,15,1,True,True,True,DAME STREET,Dame Street,53.344006,...,0,83.2,0,0.284,0,1002.56,0,1002.26,0,0.083
1,2024-12-01 00:10:00,100,17,8,True,True,True,HEUSTON BRIDGE (SOUTH),Heuston Bridge (South),53.347107,...,0,83.2,0,0.284,0,1002.56,0,1002.26,0,0.083
2,2024-12-01 00:10:00,109,20,9,True,True,True,BUCKINGHAM STREET LOWER,Buckingham Street Lower,53.353333,...,0,83.2,0,0.284,0,1002.56,0,1002.26,0,0.083
3,2024-12-01 00:10:00,11,1,29,True,True,True,EARLSFORT TERRACE,Earlsfort Terrace,53.334293,...,0,83.2,0,0.284,0,1002.56,0,1002.26,0,0.083
4,2024-12-01 00:10:00,114,4,36,True,True,True,WILTON TERRACE (PARK),Wilton Terrace (Park),53.333652,...,0,83.2,0,0.284,0,1002.56,0,1002.26,0,0.083


## Feature Engineering and Mapping

In [21]:
# Compute derived features
df['temperature'] = df[['max_air_temperature_celsius', 'min_air_temperature_celsius']].mean(axis=1)
df['humidity'] = df['max_relative_humidity_percent']
df['wind_speed'] = 5  # Placeholder due to missing wind speed data
df['precipitation'] = 0  # Placeholder

# Create datetime and derive day_of_week
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day', 'hour']])
df['day_of_week'] = df['datetime'].dt.weekday

# Rename target column
df['available_bikes'] = df['num_bikes_available']

## Clean the Data

In [22]:
df.dropna(subset=['station_id', 'temperature', 'humidity', 'wind_speed',
                 'precipitation', 'hour', 'day_of_week', 'available_bikes'], inplace=True)

## Prepare Features and Target

In [23]:
features = ['station_id', 'temperature', 'humidity', 'wind_speed', 'precipitation', 'hour', 'day_of_week']
target = 'available_bikes'
X = df[features]
y = df[target]

## Train/Test Split and Model Training

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

## Evaluate the Model

In [25]:
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")
print(f"R² Score: {r2:.2f}")

Mean Absolute Error: 8.14
R² Score: -0.00


## Save the Model for Deployment

In [26]:
with open("bike_availability_model.pkl", "wb") as f:
    pickle.dump(model, f)
joblib.dump(model, "bike_availability_model.joblib")
print("Model saved successfully.")

Model saved successfully.


## Test the Trained Model

In [27]:
# Load the saved model
with open("bike_availability_model.pkl", "rb") as f:
    loaded_model = pickle.load(f)

# Create a sample test input (change values as needed)
sample_input = pd.DataFrame([{
    'station_id': 32,
    'temperature': 16.0,
    'humidity': 75.0,
    'wind_speed': 5.0,
    'precipitation': 0.0,
    'hour': 9,
    'day_of_week': 2  # 0 = Monday
}])

# Make a prediction
prediction = loaded_model.predict(sample_input)
print(f"Predicted available bikes: {prediction[0]:.2f}")

Predicted available bikes: 12.16
