In [1]:
# Install required libraries (if not already installed)
!pip install -q tensorflow pandas matplotlib scikit-learn

In [2]:
import os
import zipfile
import random
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import mixed_precision

# Enable mixed precision for faster training
mixed_precision.set_global_policy('mixed_float16')

# Set seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
random.seed(42)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Define paths
base_dir = '/content/drive/MyDrive/dataset'
zip_path = os.path.join(base_dir, 'images.zip')
extract_path = os.path.join(base_dir, 'images')
subset_path = os.path.join(base_dir, 'images_subset')

# Define number of top classes to keep
TOP_CLASSES = 20

# Delete existing subset folder if it exists
if os.path.exists(subset_path):
    shutil.rmtree(subset_path)

# Unzip if not already done
if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(base_dir)

# Create reduced dataset with every 2nd image and top classes only
os.makedirs(subset_path, exist_ok=True)
all_classes = sorted(os.listdir(extract_path))[:TOP_CLASSES]

for class_folder in all_classes:
    class_path = os.path.join(extract_path, class_folder)
    if os.path.isdir(class_path):
        subset_class_path = os.path.join(subset_path, class_folder)
        os.makedirs(subset_class_path, exist_ok=True)
        images = sorted(os.listdir(class_path))
        for i, img_name in enumerate(images):
            if i % 2 == 0:
                src = os.path.join(class_path, img_name)
                dst = os.path.join(subset_class_path, img_name)
                shutil.copyfile(src, dst)

In [5]:
# Image dimensions
IMG_SIZE = 96
BATCH_SIZE = 32

# Data generators
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_gen = datagen.flow_from_directory(
    subset_path,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'
)

val_gen = datagen.flow_from_directory(
    subset_path,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'
)

Found 4338 images belonging to 12 classes.
Found 1084 images belonging to 12 classes.


In [6]:
# Build the model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.4)(x)
predictions = Dense(train_gen.num_classes, activation='softmax', dtype='float32')(x)
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [7]:
# Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    callbacks=[early_stop]
)

  self._warn_if_super_not_called()


Epoch 1/10
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 279ms/step - accuracy: 0.1044 - loss: 3.4088 - val_accuracy: 0.2113 - val_loss: 2.3377
Epoch 2/10
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 181ms/step - accuracy: 0.1881 - loss: 2.6646 - val_accuracy: 0.3330 - val_loss: 1.9487
Epoch 3/10
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 175ms/step - accuracy: 0.2724 - loss: 2.2565 - val_accuracy: 0.4225 - val_loss: 1.7112
Epoch 4/10
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 206ms/step - accuracy: 0.3522 - loss: 2.0121 - val_accuracy: 0.4825 - val_loss: 1.5592
Epoch 5/10
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 202ms/step - accuracy: 0.4121 - loss: 1.8048 - val_accuracy: 0.5028 - val_loss: 1.4557
Epoch 6/10
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 181ms/step - accuracy: 0.4409 - loss: 1.6648 - val_accuracy: 0.5369 - val_loss: 1.3780
Epoch 7/10

In [10]:

# Load calorie mapping
calorie_df = pd.read_csv(os.path.join(base_dir, 'food101_calorie_mapping.csv'))
class_indices = train_gen.class_indices
index_to_class = {v: k for k, v in class_indices.items()}

# Calorie estimation function
def estimate_calories(pred):
    class_id = np.argmax(pred)
    food_item = index_to_class[class_id]  # e.g., 'apple_pie'

    calorie_row = calorie_df[calorie_df['food_class'] == food_item]
    if not calorie_row.empty:
        return food_item, float(calorie_row['calories_per_100g'].values[0])
    return food_item, None


# Example usage with a single image
from tensorflow.keras.preprocessing import image

def predict_image(img_path):
    img = image.load_img(img_path, target_size=(IMG_SIZE, IMG_SIZE))
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    prediction = model.predict(img_array)

    top_5 = prediction[0].argsort()[-5:][::-1]
    print("🔍 Top 5 Predictions:")
    for i in top_5:
        print(f"{index_to_class[i]}: {prediction[0][i]*100:.2f}%")

    return estimate_calories(prediction)

# Test on a known apple pie image
food, cal = predict_image('/content/drive/MyDrive/dataset/images_subset/baby_back_ribs/1007102.jpg')
print(f"\n🍽️ Final Prediction: {food}, Estimated Calories: {cal}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
🔍 Top 5 Predictions:
baby_back_ribs: 51.24%
bibimbap: 19.19%
bread_pudding: 9.65%
beef_tartare: 8.57%
breakfast_burrito: 5.65%

🍽️ Final Prediction: baby_back_ribs, Estimated Calories: 400.0
