In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix
)

from sklearn.linear_model import  LogisticRegression

In [None]:
pre_processed_food_data_df = pd.read_csv('/content/preprocessed_food_data.csv')
pre_processed_food_data_df.head()

Unnamed: 0,Calories,Protein,Fat,Carbs,Sugar,Fiber,Sodium,Cholesterol,Glycemic_Index,Water_Content,Serving_Size,Meal_Type,Preparation_Method,Is_Vegan,Is_Gluten_Free,Food_Name
0,0.24352,0.836749,0.317656,0.735682,-0.531377,-0.402021,1.148336,-0.0071,1.14299,-0.092108,1.089942,dinner,fried,False,False,Pizza
1,-0.648228,-0.823692,-0.073232,-0.724253,1.253716,-1.276747,-0.925378,0.536603,0.274922,0.247719,-1.023919,snack,raw,False,True,Ice Cream
2,0.704439,0.834584,1.181751,0.034357,-0.310853,-0.776398,1.204276,0.566685,-0.227055,-0.541627,1.121949,snack,raw,False,False,Burger
3,-0.81499,0.045687,-1.022364,0.351692,-0.817029,-0.086344,-0.093798,0.198478,0.214884,0.862291,-0.304975,lunch,fried,False,True,Sushi
4,0.060587,-0.496977,0.642501,0.067981,1.409472,-0.270756,-0.109681,-0.566907,0.337043,-1.106943,-1.718809,snack,baked,False,False,Donut


In [None]:
X = pre_processed_food_data_df.drop("Food_Name", axis=1)
y = pre_processed_food_data_df["Food_Name"]

num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object", "bool"]).columns

In [None]:
numeric_pipeline = Pipeline([
    ("scaler", StandardScaler())
])
numeric_pipeline

categorical_pipeline = Pipeline([
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

In [None]:
preprocessor = ColumnTransformer([
    ("num", numeric_pipeline, num_cols),
    ("cat", categorical_pipeline, cat_cols)
])
preprocessor

In [None]:
# train Test Split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((25109, 15), (6278, 15), (25109,), (6278,))

In [None]:
Model=Pipeline(steps=[
    ('Preprocess_Step',preprocessor),
    ('Classifier',LogisticRegression())
])
Model

In [None]:
Model.fit(X_train,y_train)

In [None]:
# Predict For X_Train and X_Test

y_pred_train=Model.predict(X_train)
y_pred_test=Model.predict(X_test)

In [None]:
# Evaluate the Model For Both Train And Test
print('Logistic Regression - Classification Report For Train')
print(classification_report(y_train,y_pred_train))
print('Logistic Regression - Classification Report For Test')
print(classification_report(y_test,y_pred_test))

Logistic Regression - Classification Report For Train
              precision    recall  f1-score   support

       Apple       0.98      0.98      0.98      1173
      Banana       0.97      0.98      0.97       971
      Burger       0.99      0.99      0.99      3969
       Donut       1.00      0.99      1.00      3537
   Ice Cream       1.00      1.00      1.00      2362
       Pasta       0.99      1.00      0.99      3150
       Pizza       0.99      0.99      0.99      4763
       Salad       1.00      0.99      0.99       794
       Steak       1.00      0.99      1.00      1599
       Sushi       0.99      1.00      1.00      2791

    accuracy                           0.99     25109
   macro avg       0.99      0.99      0.99     25109
weighted avg       0.99      0.99      0.99     25109

Logistic Regression - Classification Report For Test
              precision    recall  f1-score   support

       Apple       0.98      0.98      0.98       311
      Banana       0.97  