In [1]:
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
import numpy as np
import pandas as pd
from pathlib import Path
from nltk.tokenize import word_tokenize, sent_tokenize
import plotly.express as px
from plotly.offline import iplot

import os
import cv2
import argparse
import numpy as np
import pandas as pd
import glob
from PIL import Image
import datetime
import matplotlib.pyplot as plt

from collections import Counter

from tensorflow import keras
from keras.callbacks import Callback
from keras.backend import clear_session
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Input, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.applications import resnet50, mobilenet, xception
from tensorflow.keras.optimizers import SGD

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# data_path = "/content/drive/MyDrive/gproject/Food Images/Food Images"
images_fl = '/content/drive/MyDrive/gproject/Food Images/Food Images'

In [4]:
image_files = os.listdir(images_fl)
image_file_set= set(image_files)

In [6]:
df = pd.read_csv('/content/drive/MyDrive/gproject/Food Ingredients and Recipe Dataset with Image Name Mapping.csv')

In [7]:
print(df['Image_Name'].dtype)
print(df['Image_Name'].isnull().sum())
# print(df['Image_Name'].isnull().sum())
missing_extensions = df[~df['Image_Name'].str.endswith(('.jpg'))]
print(missing_extensions)
df['Image_Name'] = df['Image_Name'].str.strip().str.lower()

object
0
       Unnamed: 0                                              Title  \
0               0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1               1                    Crispy Salt and Pepper Potatoes   
2               2                        Thanksgiving Mac and Cheese   
3               3                 Italian Sausage and Bread Stuffing   
4               4                                       Newton's Law   
...           ...                                                ...   
13496       13496                               Brownie Pudding Cake   
13497       13497  Israeli Couscous with Roasted Butternut Squash...   
13498       13498  Rice with Soy-Glazed Bonito Flakes and Sesame ...   
13499       13499                                        Spanakopita   
13500       13500  Mexican Poblano, Spinach, and Black Bean "Lasa...   

                                             Ingredients  \
0      ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...   
1     

In [8]:
# fixing mising extensio
df['Image_Name'] = df['Image_Name'].apply(lambda x: x + '.jpg' if not x.endswith(('.jpg', '.png')) else x)

In [9]:
df['Ingredients'] = df['Ingredients'].apply(lambda x: x.split(','))
valid_ingredients_list = sorted(list({Ingredient.strip() for row in df['Ingredients'] for Ingredient in row}))
NUM_INGREDIENTS = len(valid_ingredients_list)

In [10]:
NUM_INGREDIENTS

83374

In [11]:
# Ingredient to index mapping
ingredient_to_index ={Ingredient: idx for idx, Ingredient in enumerate(valid_ingredients_list)}
# ingredient_to_index

In [12]:
#  ingredients into a multi-hot vector
def encode_ingredients(ingredients):

    indices = [ingredient_to_index[ingredient.strip()] for ingredient in ingredients if ingredient.strip() in ingredient_to_index]
    return tf.keras.utils.to_categorical(indices, num_classes=NUM_INGREDIENTS).sum(axis=0)

df['encoded_ingredients'] = df['Ingredients'].apply(encode_ingredients)


In [None]:
# df['encoded_ingredients']

In [13]:
# Preprocessing
IMG_SIZE = 224

def preprocess_image(image_path):

    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    image = tf.image.convert_image_dtype(image, tf.float32)  # Normalize to [0, 1]
    # Apply random contrast adjustment for additional variability
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    return image

In [14]:
def preprocess_data(row):
    image_name = row['Image_Name']  # Extract the image name from the row
    if image_name in image_file_set:
        # Only process the image if it exists in the directory
        image_path = os.path.join(images_fl, image_name)
        image = preprocess_image(image_path)
    else:
        # Handle missing images gracefully with a placeholder
        print(f"Warning: {image_name} not found in directory.")
        image = tf.zeros((IMG_SIZE, IMG_SIZE, 3))

    # Convert ingredients to a tensor
    ingredients = tf.convert_to_tensor(row['encoded_ingredients'], dtype=tf.float32)
    return image, ingredients


In [None]:
def create_dataset(df):

    data = [preprocess_data(row) for _, row in df.iterrows()]
    images, labels = zip(*data)
    dataset = tf.data.Dataset.from_tensor_slices((list(images), list(labels)))
    return dataset.shuffle(buffer_size=1000).batch(32).prefetch(tf.data.AUTOTUNE)

dataset = create_dataset(df)

In [None]:
# Data Augmentation
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomBrightness(0.2),
    layers.RandomSaturation(factor=0.5)
])


In [None]:
# Functional Model Definition
input_layer = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

# Augmentation and Rescaling
augmented = data_augmentation(input_layer)

# Convolutional Base with Global Average Pooling
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(augmented)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = layers.GlobalAveragePooling2D()(x)  # Replaces Flatten for better generalization

# Dense Layers with Batch Normalization
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)  # Regularization
output_layer = layers.Dense(NUM_INGREDIENTS, activation='sigmoid')(x)

# Compile Model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the Model
model.summary()

# Train the Model
history = model.fit(dataset, epochs=10)

In [None]:


# Visualize Results
plt.figure(figsize=(12, 4))

# Training Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Accuracy')
plt.title('Training Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Training Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Loss')
plt.title('Training Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()