In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the dataset
data = pd.read_csv('wait_time_dataset.csv')

# Convert the Appointment Start Time and Optimal Consultation Time from HH:MM to minutes
def time_to_minutes(time_str):
    h, m = map(int, time_str.split(':'))
    return h * 60 + m

data['Appointment Start Time'] = data['Appointment Start Time'].apply(time_to_minutes)
data['Optimal Consultation Time'] = data['Optimal Consultation Time'].apply(time_to_minutes)

# Define features and target
features = data[['Total Slots', 'Your Slot', 'Number of Booked Slots Before Your Slot',
                 'Number of Empty Slots Before Your Slot', 'Average Consulting Time', 'Appointment Start Time']]
target = data['Optimal Consultation Time']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = model.predict(X_test)

# Convert predictions and true values back to HH:MM format for better readability
def minutes_to_time(minutes):
    return f'{minutes // 60:02}:{minutes % 60:02}'

y_test_times = [minutes_to_time(m) for m in y_test]
y_pred_times = [minutes_to_time(m) for m in y_pred]

from sklearn.metrics import r2_score

# Calculate R-squared score
r2 = r2_score(y_test, y_pred)

# Output the metrics
print(f'Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}')
print(f'Mean Squared Error: {mean_squared_error(y_test, y_pred)}')
print(f'R-squared Score: {r2}')


# Example prediction with properly formatted input
sample_input = pd.DataFrame([[20,5,15,2,6,1000]],
                            columns=['Total Slots', 'Your Slot',
                                     'Number of Booked Slots Before Your Slot',
                                     'Number of Empty Slots Before Your Slot',
                                     'Average Consulting Time',
                                     'Appointment Start Time'])

predicted_time = model.predict(sample_input)
print(f'Predicted Optimal Consultation Time: {minutes_to_time(int(predicted_time[0]))}')
from sklearn.metrics import r2_score

# Calculate R-squared score
r2 = r2_score(y_test, y_pred)


Mean Absolute Error: 22.511250000000004
Mean Squared Error: 914.0767124999993
R-squared Score: 0.9779130832554326
Predicted Optimal Consultation Time: 18:09
