In [3]:
# Rapido bike ride request forecast using ML

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [5]:
# Load dataset
data = pd.read_csv("D://rapido_ride_data.csv", parse_dates=['timestamp'])
print(data)

              timestamp  ride_requests  temperature  humidity
0   2024-01-02 00:00:00            112           26        67
1   2024-01-02 01:00:00            189           20        64
2   2024-01-02 02:00:00            102           23        89
3   2024-01-02 03:00:00             24           32        62
4   2024-01-02 04:00:00            116           23        70
..                  ...            ...          ...       ...
140 2024-06-02 20:00:00             51           26        89
141 2024-06-02 21:00:00            133           21        64
142 2024-06-02 22:00:00            188           27        63
143 2024-06-02 23:00:00             72           20        52
144 2024-07-02 00:00:00            105           30        46

[145 rows x 4 columns]


In [7]:
# Feature Engineering
data['hour'] = data['timestamp'].dt.hour
data['dayofweek'] = data['timestamp'].dt.dayofweek
data['month'] = data['timestamp'].dt.month
data.drop(columns=['timestamp'], inplace=True)

In [9]:
# Define features and target
X = data.drop(columns=['ride_requests'])
y = data['ride_requests']
print(y)

0      112
1      189
2      102
3       24
4      116
      ... 
140     51
141    133
142    188
143     72
144    105
Name: ride_requests, Length: 145, dtype: int64


In [11]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train, X_test, y_train, y_test)

     temperature  humidity  hour  dayofweek  month
9             32        55     9          1      1
4             23        70     4          1      1
26            25        42     2          4      2
120           31        71     0          6      6
125           23        66     5          6      6
..           ...       ...   ...        ...    ...
71            20        77    23          5      3
106           30        60    10          3      5
14            32        40    14          1      1
92            20        69    20          1      4
102           23        85     6          3      5

[116 rows x 5 columns]      temperature  humidity  hour  dayofweek  month
69            20        69    21          5      3
140           26        89    20          6      6
27            31        59     3          4      2
19            31        71    19          1      1
42            20        64    18          4      2
117           34        85    21          3      5
126    

In [13]:
# Train Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [17]:
# Predictions
y_pred = model.predict(X_test)

In [19]:
# Evaluate Model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print(f"MAE: {mae}, RMSE: {rmse}")

MAE: 50.18965517241381, RMSE: 61.23502001193491


In [23]:
# Predict for a given hour
def predict_ride_requests(hour, temperature, humidity, dayofweek, month):
    input_data = np.array([[hour, temperature, humidity, dayofweek, month]])
    return model.predict(input_data)[0]

In [33]:
# Example Prediction
hour = 16  # 1 PM
temperature = 32  # Celsius
humidity = 54 # Percentage
dayofweek = 1  # monday
month = 4  # April

predicted_rides = predict_ride_requests(hour, temperature, humidity, dayofweek, month)
print(f"Predicted ride requests for {hour}:00 -> {predicted_rides}")

Predicted ride requests for 16:00 -> 87.54


