In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier
import joblib

df = pd.read_csv("../data/canteen_data.csv")
df.head()


Unnamed: 0,day_of_week,time_slot,item,historical_orders,item_popularity,is_exam_week,special_event_flag,weather,queue_length,active_counters,staff_count,avg_prep_time,profit_margin,ingredient_availability,prep_time,wastage_history,demand_qty,expected_wait_time,prep_level
0,Mon,Breakfast,Idli,58,0.870429,0,0,Normal,23,3,5,7.13324,0.433596,0.571433,11.508885,0.064103,67,6.7,Medium
1,Mon,Breakfast,Dosa,57,0.409095,0,0,Normal,29,1,5,8.506676,0.297646,0.646072,8.663618,0.164017,48,4.8,Low
2,Mon,Breakfast,Samosa,22,0.664527,0,0,Rainy,18,2,4,5.741524,0.26837,0.842117,9.401525,0.08051,16,1.6,High
3,Mon,Breakfast,Chai,27,0.404019,0,0,Rainy,10,2,4,8.109303,0.221919,0.921142,9.497541,0.148788,31,3.1,Medium
4,Mon,Breakfast,Coffee,50,0.495924,0,0,Rainy,25,1,4,7.857229,0.610726,0.982628,11.070342,0.119,55,5.5,High


In [2]:
# Features and target for Model 1
X1 = df[["day_of_week","time_slot","item","historical_orders",
         "item_popularity","is_exam_week","special_event_flag",
         "weather"]]

y1 = df["demand_qty"]

# Categorical columns
cat1 = ["day_of_week","time_slot","item","weather"]

pre1 = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat1)
    ],
    remainder="passthrough"
)

model1 = Pipeline(steps=[
    ("preprocess", pre1),
    ("rf", RandomForestRegressor(n_estimators=200, random_state=42))
])

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2, random_state=42)

model1.fit(X1_train, y1_train)

# Save model
joblib.dump(model1, "../models/model_demand.pkl")

print("MODEL 1 TRAINED & SAVED ✔")


MODEL 1 TRAINED & SAVED ✔


In [3]:
X2 = df[["queue_length","active_counters","staff_count",
         "avg_prep_time","time_slot"]]

y2 = df["expected_wait_time"]

cat2 = ["time_slot"]

pre2 = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat2)
    ],
    remainder="passthrough"
)

model2 = Pipeline([
    ("preprocess", pre2),
    ("gb", GradientBoostingRegressor(random_state=42))
])

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)

model2.fit(X2_train, y2_train)

joblib.dump(model2, "../models/model_waittime.pkl")

print("MODEL 2 TRAINED & SAVED ✔")


MODEL 2 TRAINED & SAVED ✔


In [4]:
X3 = df[["historical_orders","profit_margin","ingredient_availability",
         "prep_time","item_popularity","wastage_history"]]

y3 = df["prep_level"]

model3 = Pipeline([
    ("clf", DecisionTreeClassifier(max_depth=4, random_state=42))
])

X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y3, test_size=0.2, random_state=42)

model3.fit(X3_train, y3_train)

joblib.dump(model3, "../models/model_prep_level.pkl")

print("MODEL 3 TRAINED & SAVED ✔")


MODEL 3 TRAINED & SAVED ✔
