In [None]:
import pandas as pd

df = pd.read_csv('drive/MyDrive/datasets/pizza_sales.csv')
df.head()

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name
0,1,1,hawaiian_m,1,01-01-2015,11:38:36,13.25,13.25,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza
1,2,2,classic_dlx_m,1,01-01-2015,11:57:40,16.0,16.0,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza
2,3,2,five_cheese_l,1,01-01-2015,11:57:40,18.5,18.5,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza
3,4,2,ital_supr_l,1,01-01-2015,11:57:40,20.75,20.75,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza
4,5,2,mexicana_m,1,01-01-2015,11:57:40,16.0,16.0,M,Veggie,"Tomatoes, Red Peppers, Jalapeno Peppers, Red O...",The Mexicana Pizza


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [None]:
df = df[['pizza_size', 'unit_price', 'pizza_ingredients', 'pizza_category']]
df.head()

Unnamed: 0,pizza_size,unit_price,pizza_ingredients,pizza_category
0,M,13.25,"Sliced Ham, Pineapple, Mozzarella Cheese",Classic
1,M,16.0,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",Classic
2,L,18.5,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",Veggie
3,L,20.75,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",Supreme
4,M,16.0,"Tomatoes, Red Peppers, Jalapeno Peppers, Red O...",Veggie


In [None]:
X = df[['pizza_size', 'unit_price', 'pizza_ingredients']]
y = df['pizza_category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
preprocessor = ColumnTransformer(transformers=[
    ('size', OneHotEncoder(), ['pizza_size']),
    ('ingredients', TfidfVectorizer(), 'pizza_ingredients')
], remainder='passthrough')

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

model.fit(X_train, y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Accuracy: {accuracy*100:.0f}%")

print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))

✅ Accuracy: 100%

📊 Classification Report:
              precision    recall  f1-score   support

     Chicken       1.00      1.00      1.00      2155
     Classic       1.00      1.00      1.00      2935
     Supreme       1.00      1.00      1.00      2357
      Veggie       1.00      1.00      1.00      2277

    accuracy                           1.00      9724
   macro avg       1.00      1.00      1.00      9724
weighted avg       1.00      1.00      1.00      9724



In [None]:
print(df)

       pizza_id  order_id  pizza_name_id  quantity  order_date order_time  \
0             1         1     hawaiian_m         1  01-01-2015   11:38:36   
1             2         2  classic_dlx_m         1  01-01-2015   11:57:40   
2             3         2  five_cheese_l         1  01-01-2015   11:57:40   
3             4         2    ital_supr_l         1  01-01-2015   11:57:40   
4             5         2     mexicana_m         1  01-01-2015   11:57:40   
...         ...       ...            ...       ...         ...        ...   
48615     48616     21348  ckn_alfredo_m         1  31-12-2015   21:23:10   
48616     48617     21348  four_cheese_l         1  31-12-2015   21:23:10   
48617     48618     21348   napolitana_s         1  31-12-2015   21:23:10   
48618     48619     21349     mexicana_l         1  31-12-2015   22:09:54   
48619     48620     21350      bbq_ckn_s         1  31-12-2015   23:02:05   

       unit_price  total_price pizza_size pizza_category  \
0           13.