In [1]:
!pip install pandas scikit-learn psycopg2-binary

Defaulting to user installation because normal site-packages is not writeable


In [4]:
#to build a model to predict no-shows

import pandas as pd
import psycopg2
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

DB_HOST = "localhost"
DB_NAME = "salon_db"
DB_USER = "postgres"
DB_PASS = "Aug081978"

In [5]:
#load data from db

try:
  conn = psycopg2.connect(host=DB_HOST, database=DB_NAME, user=DB_USER, password=DB_PASS)
  query = "SELECT * FROM appointments"
  df = pd.read_sql(query, conn)
  conn.close()
  print("Data loaded successfully from database.")
  print("Data shape:, df.shape")
  print("Data preview:")
  print(df.head())
except Exception as e:
  print(f"Error loading data from database: {e}")

Data loaded successfully from database.
Data shape:, df.shape
Data preview:
   appointment_id  client_id           service  staff    appointment_time  \
0               1        113          Manicure    Bea 2025-05-18 15:00:00   
1               2          8      Gel Manicure  Sofia 2024-09-16 17:00:00   
2               3         23      Gel Manicure  Sofia 2024-09-28 17:00:00   
3               4        144   Haircut & Style  Sofia 2024-12-27 10:00:00   
4               5        117  Full Set Acrylic   Anna 2025-07-27 15:00:00   

         booking_time     status   price  
0 2025-05-07 15:00:00  Completed   500.0  
1 2024-08-21 17:00:00  Completed  1200.0  
2 2024-09-16 17:00:00  Completed  1200.0  
3 2024-12-18 10:00:00  Cancelled   800.0  
4 2025-07-15 15:00:00    No-Show  2000.0  


  df = pd.read_sql(query, conn)


In [7]:
# 2. Feature Engineering
print("\nPerforming feature engineering...")
df['appointment_time'] = pd.to_datetime(df['appointment_time'])
df['booking_time'] = pd.to_datetime(df['booking_time'])

# Target variable: 1 for 'No-Show', 0 otherwise
df['is_no_show'] = (df['status'] == 'No-Show').astype(int)

# Feature: Lead time in days
df['lead_time_days'] = (df['appointment_time'] - df['booking_time']).dt.days

# Feature: Day of the week
df['day_of_week'] = df['appointment_time'].dt.dayofweek # Monday=0, Sunday=6

# Feature: Client History (this is a more advanced feature)
df = df.sort_values(by=['client_id', 'appointment_time'])
df['previous_appointments'] = df.groupby('client_id').cumcount()
df['previous_no_shows'] = df[df['status'] == 'No-Show'].groupby('client_id').cumcount().fillna(0)
df['no_show_rate'] = (df['previous_no_shows'] / df['previous_appointments']).fillna(0)

# Select features and target
features = ['lead_time_days', 'day_of_week', 'previous_appointments', 'no_show_rate', 'price']
target = 'is_no_show'

# Handle potential missing values
df_model = df[features + [target]].dropna()

X = df_model[features]
y = df_model[target]

print("Feature engineering complete.")


Performing feature engineering...
Feature engineering complete.


In [9]:
#MODEL TRAINING

print("\nTraining the model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

print("Model training complete!")



Training the model...
Model training complete!


In [11]:
#MODEL EVALUATION

print("\nEvaluating the model...")
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))



Evaluating the model...
Model Accuracy: 0.92

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.98      0.95       837
           1       0.86      0.62      0.72       163

    accuracy                           0.92      1000
   macro avg       0.89      0.80      0.84      1000
weighted avg       0.92      0.92      0.92      1000


Confusion Matrix:
[[820  17]
 [ 62 101]]
