In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix
)

from sklearn.neighbors import KNeighborsClassifier

In [5]:
processed_food_df = pd.read_csv('/content/preprocessed_food_data.csv')
processed_food_df.head()

Unnamed: 0,Calories,Protein,Fat,Carbs,Sugar,Fiber,Sodium,Cholesterol,Glycemic_Index,Water_Content,Serving_Size,Meal_Type,Preparation_Method,Is_Vegan,Is_Gluten_Free,Food_Name
0,0.24352,0.836749,0.317656,0.735682,-0.531377,-0.402021,1.148336,-0.0071,1.14299,-0.092108,1.089942,dinner,fried,False,False,Pizza
1,-0.648228,-0.823692,-0.073232,-0.724253,1.253716,-1.276747,-0.925378,0.536603,0.274922,0.247719,-1.023919,snack,raw,False,True,Ice Cream
2,0.704439,0.834584,1.181751,0.034357,-0.310853,-0.776398,1.204276,0.566685,-0.227055,-0.541627,1.121949,snack,raw,False,False,Burger
3,-0.81499,0.045687,-1.022364,0.351692,-0.817029,-0.086344,-0.093798,0.198478,0.214884,0.862291,-0.304975,lunch,fried,False,True,Sushi
4,0.060587,-0.496977,0.642501,0.067981,1.409472,-0.270756,-0.109681,-0.566907,0.337043,-1.106943,-1.718809,snack,baked,False,False,Donut


In [12]:
processed_food_df.isnull().sum()

Unnamed: 0,0
Calories,0
Protein,0
Fat,0
Carbs,0
Sugar,1
Fiber,1
Sodium,1
Cholesterol,1
Glycemic_Index,1
Water_Content,1


In [6]:
## Identify Features & Target

X = processed_food_df.drop("Food_Name", axis=1)
y = processed_food_df["Food_Name"]

In [7]:
## Separate Numerical & Categorical Columns

num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object", "bool"]).columns

In [8]:
## Preprocessing Pipeline

numeric_pipeline = Pipeline([
    ("scaler", StandardScaler())
])
numeric_pipeline

categorical_pipeline = Pipeline([
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])
categorical_pipeline

In [9]:
## Combine Using ColumnTransformer ##

preprocessor = ColumnTransformer([
    ("num", numeric_pipeline, num_cols),
    ("cat", categorical_pipeline, cat_cols)
])
preprocessor

In [13]:
## Train-Test Split ##

# Identify rows with NaN in y
nan_y_mask = y.isna()

# Drop corresponding rows from X and y to handle NaN values
X_cleaned = X[~nan_y_mask]
y_cleaned = y[~nan_y_mask]

X_train, X_test, y_train, y_test = train_test_split(
    X_cleaned, y_cleaned,
    test_size=0.2,
    stratify=y_cleaned,
    random_state=42
)

In [15]:
Model=Pipeline(steps=[
    ('Preprocess_Step',preprocessor),
     ('KNeighbors_classifier',KNeighborsClassifier())
])

In [16]:
Model.fit(X_train, y_train)

In [17]:
## Model Predict Evaluation ##

y_pred_train=Model.predict(X_train)
y_pred_test=Model.predict(X_test)

In [18]:
## accuracy check process ##

accuracy = accuracy_score(y_test, y_pred_test)
accuracy

0.9861356932153392

In [19]:
# Evaluate the Model For Both Train And Test
print('KNeighbors - Classification Report For Train')
print(classification_report(y_train,y_pred_train))
print('KNeighbors - Classification Report For Test')
print(classification_report(y_test,y_pred_test))

KNeighbors - Classification Report For Train
              precision    recall  f1-score   support

       Apple       0.98      0.99      0.99       633
      Banana       0.99      0.98      0.99       503
      Burger       0.98      0.98      0.98      2194
       Donut       1.00      1.00      1.00      1947
   Ice Cream       1.00      0.99      1.00      1279
       Pasta       1.00      0.99      1.00      1691
       Pizza       0.99      0.99      0.99      2539
       Salad       1.00      0.99      0.99       435
       Steak       1.00      0.99      1.00       830
       Sushi       0.99      1.00      0.99      1507

    accuracy                           0.99     13558
   macro avg       0.99      0.99      0.99     13558
weighted avg       0.99      0.99      0.99     13558

KNeighbors - Classification Report For Test
              precision    recall  f1-score   support

       Apple       0.98      0.96      0.97       158
      Banana       0.98      0.98      0.98