# MileStone Project 1: Food Vision 101

In [1]:
import tensorflow as tf

## Check GPU

We are going to use mixed-precision training which requires the GPU to have a compute capability > 7. Due to which we are going to use Tesla T4 (only mixed precision compatibility offered by google collab).

In [2]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-51c465c0-2955-0cd1-483d-f1921501f009)


## Get Helper Functions

In [3]:
!wget https://raw.githubusercontent.com/ShataayuM/AIML/refs/heads/main/helper_functions.py

--2025-09-13 14:26:58--  https://raw.githubusercontent.com/ShataayuM/AIML/refs/heads/main/helper_functions.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10246 (10K) [text/plain]
Saving to: ‘helper_functions.py’


2025-09-13 14:26:59 (99.4 MB/s) - ‘helper_functions.py’ saved [10246/10246]



In [4]:
!pip install tensorflow



In [5]:
# Import series of helper functions:
from helper_functions import create_tensorboard_callback, plot_loss_curves, compare_historys

## Downloading the Data and Prepareing it

Step 1: Download & extract manually

In [6]:
!wget http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz
!tar -xvzf food-101.tar.gz

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
food-101/images/fish_and_chips/1495386.jpg
food-101/images/fish_and_chips/2286768.jpg
food-101/images/fish_and_chips/1614554.jpg
food-101/images/fish_and_chips/2255974.jpg
food-101/images/fish_and_chips/184654.jpg
food-101/images/fish_and_chips/1547043.jpg
food-101/images/fish_and_chips/1286350.jpg
food-101/images/fish_and_chips/1506981.jpg
food-101/images/fish_and_chips/1668039.jpg
food-101/images/fish_and_chips/3783246.jpg
food-101/images/fish_and_chips/2737696.jpg
food-101/images/fish_and_chips/2415083.jpg
food-101/images/fish_and_chips/3258877.jpg
food-101/images/fish_and_chips/1128671.jpg
food-101/images/fish_and_chips/3584880.jpg
food-101/images/fish_and_chips/1118825.jpg
food-101/images/fish_and_chips/1225146.jpg
food-101/images/fish_and_chips/3196765.jpg
food-101/images/fish_and_chips/2497717.jpg
food-101/images/fish_and_chips/3226554.jpg
food-101/images/fish_and_chips/3655543.jpg
food-101/images/fish_and_chips/40

Step 2: Prepare data generators using file lists

In [7]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set paths
base_dir = 'food-101'
images_dir = os.path.join(base_dir, 'images')
meta_dir = os.path.join(base_dir, 'meta')

# Read train/test splits
with open(os.path.join(meta_dir, 'train.txt')) as f:
    train_files = [line.strip() for line in f]

with open(os.path.join(meta_dir, 'test.txt')) as f:
    test_files = [line.strip() for line in f]

# Create a helper function to copy files to folders readable by flow_from_directory (optional)
import shutil
def organize_dataset(file_list, target_root):
    for file in file_list:
        class_name = file.split('/')[0]
        src = os.path.join(images_dir, file + '.jpg')
        dst_dir = os.path.join(target_root, class_name)
        os.makedirs(dst_dir, exist_ok=True)
        shutil.copy(src, dst_dir)

# Optional: Organize into train/ and test/ folders
os.makedirs('data/train', exist_ok=True)
os.makedirs('data/test', exist_ok=True)

organize_dataset(train_files, 'data/train')
organize_dataset(test_files, 'data/test')


FileNotFoundError: [Errno 2] No such file or directory: 'food-101/meta/train.txt'

Step 3: Use Keras ImageDataGenerator to load the images

In [None]:
import pathlib

data_dir = pathlib.Path("data/train")
test_dir = pathlib.Path("data/test")

batch_size = 32
img_size = (224, 224)


train_dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="categorical",  # for categorical_crossentropy
    shuffle=True,
    seed=42
)

test_dataset = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=img_size,
    batch_size=batch_size,
    label_mode="categorical",  # for categorical_crossentropy
    shuffle=False
)

# Add prefetching for performance
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

In [None]:
# Get class names
class_names = sorted(os.listdir('data/train'))

In [None]:
class_names

In [None]:
train_dir = 'data/train'
test_dir = 'data/test'

## Plot an image from TensorFlow Datasets

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Get one batch of images and labels
images, labels = next(iter(train_dataset))  # Use train_dataset instead of train_data

# Plot the first 5 images from the batch
plt.figure(figsize=(15, 5))
for i in range(5):
    ax = plt.subplot(1, 5, i + 1)
    plt.imshow(images[i].numpy().astype("uint8")) # Convert to numpy array and uint8 for plotting
    plt.title(class_names[np.argmax(labels[i])])
    plt.axis("off")

## Creating modelling callbacks
callbacks used:
* Tensorboard
* ModelCheckpoint
* Early Stopping

In [None]:
# Create tensorboard Callback
from helper_functions import create_tensorboard_callback

# Model Checkpoint Callback
checkpoint_path = "model_checpoint/cp.weights.h5"
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                      monitor = "val_acc",
                                                      save_best_only = True,
                                                      save_weights_only = True,
                                                      verbose = 0)

# Early Stopping Callbacks
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = "val_loss",
                                                  patience = 3,
                                                  verbose = 0,
                                                  restore_best_weights = True)

## Setup mixed precision training

Mixed precision training involves using a mix of float16 and float32 tensors to make better use of your GPU's memory.

In [None]:
# Turn on mixed precision training
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy("mixed_float16")

## Build Feature Extraction Model

In [None]:
from tensorflow.keras import layers
from tensorflow.keras import preprocessing

In [None]:
# Create base model
input_shape = (224, 224, 3)
base_model = tf.keras.applications.EfficientNetB0(include_top = False)
base_model.trainable = False

# Create functional model
inputs = layers.Input(shape = input_shape, name = "Input_layer")
# No rescaling required since efficientnet models have built in rescaling layers
x = base_model(inputs, training = False)
x = layers.GlobalAveragePooling2D(name = "pooling_layer")(x)
x = layers.Dense(len(class_names))(x)
# Some parts (like loss computation and output activation) require float32 to maintain numerical stability.
outputs = layers.Activation("softmax", dtype = tf.float32, name = "softmax_float32")(x)

model = tf.keras.Model(inputs, outputs)

In [None]:
# Compile the model
model.compile(loss = "categorical_crossentropy",
              optimizer = tf.keras.optimizers.Adam(),
              metrics = ['accuracy'])

In [None]:
model.summary()

### Checking layer dtype policies (are we using mixed precision?)

In [None]:
for layer in model.layers:
  print(layer.name, layer.trainable, layer.dtype, layer.dtype_policy)

Going through above we see"
  * `layer.name (str)` : a layer's human-readable name, can be defined by the name parameter on construction
  * `layer.trainable (bool)` : whether or not a layer is trainable (all of our layers are trainable except the efficientnetb0 layer since we set it's trainable attribute to False
  * `layer.dtype` : the data type a layer stores its variables in
  * `layer.dtype_policy `: the data type a layer computes in


### Fitting the model

In [None]:
history_101_food_classes_feature_extraction = model.fit(train_dataset,
                                                        epochs = 3,
                                                        steps_per_epoch = len(train_dataset),
                                                        validation_data = test_dataset,
                                                        validation_steps = int(0.15 * len(test_dataset)),
                                                        callbacks = [create_tensorboard_callback(dir_name = "training_logs",
                                                                                                 experiment_name = "efficientnetb0_101_classes_all_data_feature_extraction"),
                                                                     model_checkpoint,
                                                                     early_stopping])

In [None]:
# Evaluate model on whole test dataset
results_feature_extract_model = model.evaluate(test_dataset)
results_feature_extract_model

## Fine-Tuning our model

In [None]:
# Unfreeze all the layers
base_model.trainable = True

# Re freeze all except last 15 layers
for layer in base_model.layers[:-15]:
  layer.trainable = False

In [None]:
# Recompile the model
model.compile(loss = "categorical_crossentropy",
              optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
              metrics = ['accuracy'])

In [None]:
# Refitting the model for 5 more epochs
history_101_food_classes_fine_tune = model.fit(train_dataset,
                                               epochs = 8,
                                               steps_per_epoch = len(train_dataset),
                                               validation_steps = int(0.15 * len(test_dataset)),
                                               validation_data = test_dataset,
                                               initial_epoch = history_101_food_classes_feature_extraction.epoch[-1],
                                               callbacks = [create_tensorboard_callback(dir_name = "training_logs",
                                                                                                 experiment_name = "efficientnetb0_101_classes_all_data_feature_extraction"),
                                                                     model_checkpoint,
                                                                     early_stopping])

In [None]:
# Evaluate model on whole test dataset
results_fine_tuned_model = model.evaluate(test_dataset)
results_fine_tuned_model

In [None]:
# Compare histories:
compare_historys(original_history = history_101_food_classes_feature_extraction,
                 new_history = history_101_food_classes_fine_tune)

## Saving and Loading our model and model_weights

In [None]:
# Saving our model
model.save("/content/drive/MyDrive/Food Vision 101/fine_tuned.h5")

In [None]:
# Saving our model's weights
model.save_weights("/content/drive/MyDrive/Food Vision 101/efficientnetb0_food101_fine_tuned_weights.weights.h5")

## Comparing results with TensorBoard Callback

In [None]:
%load_ext tensorboard
%tensorboard --logdir training_logs

## Evaluation Metrics:

In [None]:
def evaluation_metrics(y_true, y_pred, model):
  '''
  y_true = the actual value of the labels
  y_pred = the predicted value of the labels
  model = the name of the model
  '''
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

  # Calculate evaluation metrics
  return {"Accuracy: ": accuracy_score(y_true, y_pred),
          "Precision: ": precision_score(y_true, y_pred, average='weighted'),
          "Recall: ": recall_score(y_true, y_pred, average='weighted'),
          "F1-Score: ": f1_score(y_true, y_pred, average='weighted')}

In [None]:
import numpy as np

# Predict on the test data
y_pred_probs = model.predict(test_dataset, verbose=1)  # shape: (num_samples, num_classes)
y_pred = np.argmax(y_pred_probs, axis=1)            # get predicted class indices

In [None]:
import numpy as np

# Get actual labels from the generator
y_true = np.concatenate([y for x, y in test_dataset], axis=0)
#cant use test_dataset cuz Once wrapped in a tf.data.Dataset, it no longer has .classes like ImageDataGenerator.flow_from_directory() does.  # shape: (num_samples,)

In [None]:
model_result = evaluation_metrics(np.argmax(y_true, axis=1), y_pred, model)

In [None]:
model_result

## Model Deployment

In [None]:
%%writefile model.py
# Create base model
def create_model(input_shape = (224, 224, 3)):

  base_model = tf.keras.applications.EfficientNetB0(include_top = False)
  base_model.trainable = False

  # Create functional model
  inputs = layers.Input(shape = input_shape, name = "Input_layer")
  # No rescaling required since efficientnet models have built in rescaling layers
  x = base_model(inputs, training = False)
  x = layers.GlobalAveragePooling2D(name = "pooling_layer")(x)
  x = layers.Dense(len(class_names))(x)
  # Some parts (like loss computation and output activation) require float32 to maintain numerical stability.
  outputs = layers.Activation("softmax", dtype = tf.float32, name = "softmax_float32")(x)

  model = tf.keras.Model(inputs, outputs)
  return model

In [None]:
%%writefile app.py
import gradio as gr
import tensorflow as tf
import numpy as np

import tensorflow as tf


# Dummy Cast layer to fix loading issue from mixed precision training
class Cast(tf.keras.layers.Layer):
    def __init__(self, dtype=tf.float32, **kwargs):
        super().__init__(**kwargs)
        self._dtype = dtype

    def call(self, inputs):
        return tf.cast(inputs, self._dtype)

# Optional: Softmax Activation that TensorFlow sometimes names weirdly during serialization
class Activation(tf.keras.layers.Activation):
    pass

custom_objects = {
    "Cast": Cast,
    "Activation": Activation,  # Handles 'softmax_float32'
}

# Load the model
model = tf.keras.models.load_model(
    "fine_tuned.h5",
    custom_objects=custom_objects
)


def predict(image):
    # Preprocess the image for your model
    image = tf.image.resize(image, [224, 224])          # Resize to model input size
    image = tf.cast(image, tf.float32)                  # Cast to float32
    image = tf.keras.applications.imagenet_utils.preprocess_input(image)
    image = tf.expand_dims(image, axis=0)               # Add batch dimension

    # Make prediction
    prediction = model.predict(image)

    # Get class labels (replace with your actual class names)
    class_names = ['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito',
 'bruschetta',
 'caesar_salad',
 'cannoli',
 'caprese_salad',
 'carrot_cake',
 'ceviche',
 'cheese_plate',
 'cheesecake',
 'chicken_curry',
 'chicken_quesadilla',
 'chicken_wings',
 'chocolate_cake',
 'chocolate_mousse',
 'churros',
 'clam_chowder',
 'club_sandwich',
 'crab_cakes',
 'creme_brulee',
 'croque_madame',
 'cup_cakes',
 'deviled_eggs',
 'donuts',
 'dumplings',
 'edamame',
 'eggs_benedict',
 'escargots',
 'falafel',
 'filet_mignon',
 'fish_and_chips',
 'foie_gras',
 'french_fries',
 'french_onion_soup',
 'french_toast',
 'fried_calamari',
 'fried_rice',
 'frozen_yogurt',
 'garlic_bread',
 'gnocchi',
 'greek_salad',
 'grilled_cheese_sandwich',
 'grilled_salmon',
 'guacamole',
 'gyoza',
 'hamburger',
 'hot_and_sour_soup',
 'hot_dog',
 'huevos_rancheros',
 'hummus',
 'ice_cream',
 'lasagna',
 'lobster_bisque',
 'lobster_roll_sandwich',
 'macaroni_and_cheese',
 'macarons',
 'miso_soup',
 'mussels',
 'nachos',
 'omelette',
 'onion_rings',
 'oysters',
 'pad_thai',
 'paella',
 'pancakes',
 'panna_cotta',
 'peking_duck',
 'pho',
 'pizza',
 'pork_chop',
 'poutine',
 'prime_rib',
 'pulled_pork_sandwich',
 'ramen',
 'ravioli',
 'red_velvet_cake',
 'risotto',
 'samosa',
 'sashimi',
 'scallops',
 'seaweed_salad',
 'shrimp_and_grits',
 'spaghetti_bolognese',
 'spaghetti_carbonara',
 'spring_rolls',
 'steak',
 'strawberry_shortcake',
 'sushi',
 'tacos',
 'takoyaki',
 'tiramisu',
 'tuna_tartare',
 'waffles']


    # Return a dictionary of class probabilities
    return {class_names[i]: float(prediction[0][i]) for i in range(len(class_names))}

# Create the Gradio interface
image_input = gr.Image(type="numpy", image_mode="RGB", label="Upload Image")
label_output = gr.Label(num_top_classes=1)

interface = gr.Interface(
    fn=predict,
    inputs=image_input,
    outputs=label_output,
    title="TensorFlow Image Classifier",
    description="Upload an image for classification",
    examples=["/content/food-101/images/cheesecake/1023543.jpg","/content/food-101/images/ceviche/1013481.jpg"]
)

# Launch the interface
interface.launch()

In [None]:
print(np.__version__)

In [None]:
model.save("fine_tuned.keras")  # Saves in the new format



In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
# Create food_classes.txt
with open("food_classes.txt", "w") as f:
    for class_name in class_names:
        f.write(class_name + "\n")

In [None]:
from tensorflow import keras

# Load the trained model
model = keras.models.load_model("fine_tuned.keras")

# Load class labels
classes = open("food_classes.txt").read().splitlines()


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import efficientnet
from tensorflow.keras.preprocessing import image

# 1️⃣ Download food_classes.txt if missing (already created in a previous cell)
if not os.path.exists("food_classes.txt"):
    !wget https://raw.githubusercontent.com/ShataayuM/Food-Vision-101/main/food_classes.txt

# 2️⃣ Load class labels (already created in a previous cell)
with open("food_classes.txt", "r") as f:
    class_names = [line.strip() for line in f]

# 3️⃣ Load the best trained model (use the locally saved model)
model_path = "fine_tuned.keras"
# No need to download since you saved it locally

model = tf.keras.models.load_model(model_path, compile=False)

# 4️⃣ Preprocessing function (MUST match training)
IMG_SIZE = (224, 224)
preprocess = efficientnet.preprocess_input

def predict_image(img_path):
    img = image.load_img(img_path, target_size=IMG_SIZE)
    x = image.img_to_array(img)
    x = preprocess(x)
    x = np.expand_dims(x, axis=0)

    preds = model.predict(x)
    top_indices = preds[0].argsort()[-5:][::-1]
    print("\nTop 5 Predictions:")
    for i in top_indices:
        print(f"{class_names[i]}: {preds[0][i]*100:.2f}%")

# 5️⃣ Example run (use your uploaded image)
test_image_path = "cheesecake-1-22.jpg" # Corrected filename
predict_image(test_image_path)

In [None]:
from google.colab import files
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image

# Upload image
uploaded = files.upload()

# Load the trained model
model = tf.keras.models.load_model("fine_tuned.keras", compile=False)

# Load class labels
with open("food_classes.txt", "r") as f:
    class_names = [line.strip() for line in f]

for fn in uploaded.keys():
    # Load and preprocess image
    img_path = fn
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = tf.keras.applications.efficientnet.preprocess_input(x)

    # Predict
    preds = model.predict(x)
    top_indices = preds[0].argsort()[-5:][::-1]

    print("\nTop 5 Predictions:")
    for i in top_indices:
        print(f"{class_names[i]}: {preds[0][i]*100:.2f}%")