In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
from PIL import Image, UnidentifiedImageError
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Flatten, Dropout
from tensorflow.keras.applications import MobileNetV2
import os
import random
import shutil

# Function to create a subset of the dataset
def create_subset(original_dir, subset_dir, num_images_per_class):
    if not os.path.exists(subset_dir):
        os.makedirs(subset_dir)
    
    for class_name in os.listdir(original_dir):
        class_dir = os.path.join(original_dir, class_name)
        if not os.path.isdir(class_dir):
            continue  # Skip non-directory files like .DS_Store
        subset_class_dir = os.path.join(subset_dir, class_name)
        
        if not os.path.exists(subset_class_dir):
            os.makedirs(subset_class_dir)
        
        images = os.listdir(class_dir)
        selected_images = random.sample(images, min(num_images_per_class, len(images)))
        
        for image_name in selected_images:
            src = os.path.join(class_dir, image_name)
            dst = os.path.join(subset_class_dir, image_name)
            shutil.copyfile(src, dst)

# Paths
train_dir = "/Users/Barbara/Downloads/food11/training"
val_dir   = "/Users/Barbara/Downloads/food11/validation"
test_dir  = "/Users/Barbara/Downloads/food11/evaluation"

train_subset_dir = "/Users/Barbara/Downloads/food11/train_subset"
val_subset_dir = "/Users/Barbara/Downloads/food11/val_subset"
test_subset_dir = "/Users/Barbara/Downloads/food11/test_subset"

# Create subsets with a limited number of images per class
create_subset(train_dir, train_subset_dir, 48)
create_subset(val_dir, val_subset_dir, 48)
create_subset(test_dir, test_subset_dir, 48)

target_size = (224, 224)
batch_size = 16

# Custom Data Generator
class CustomImageDataGenerator(ImageDataGenerator):
    def flow_from_directory(self, directory, *args, **kwargs):
        generator = super().flow_from_directory(directory, *args, **kwargs)
        self.target_size = kwargs.get('target_size', (224, 224))
        self.num_classes = generator.num_classes
        self.filepaths = generator.filepaths
        self.labels = generator.classes
        generator._get_batches_of_transformed_samples = self._get_batches_of_transformed_samples
        return generator

    def _get_batches_of_transformed_samples(self, index_array):
        batch_x = np.zeros((len(index_array),) + self.target_size + (3,), dtype=self.dtype)
        batch_y = np.zeros((len(index_array), self.num_classes), dtype=self.dtype)
        
        for i, j in enumerate(index_array):
            img_path = self.filepaths[j]
            img = safe_load_img(img_path, self.target_size)
            if img is not None:
                batch_x[i] = img
                batch_y[i] = self._get_onehot(self.labels[j])
        return batch_x, batch_y

    def _get_onehot(self, label):
        onehot = np.zeros(self.num_classes)
        onehot[label] = 1.0
        return onehot

# Function to safely load images
def safe_load_img(path, target_size=(224, 224)):
    try:
        img = Image.open(path)
        img = img.resize(target_size)
        img_array = image.img_to_array(img)
        return img_array
    except UnidentifiedImageError:
        return None

# Data Generators with error handling
train_datagen = CustomImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

valid_datagen = CustomImageDataGenerator(rescale=1./255)
test_datagen = CustomImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_subset_dir,
    target_size=target_size,
    batch_size=batch_size,
    color_mode='rgb',
    shuffle=True,
    seed=42,
    class_mode='categorical'
)

valid_generator = valid_datagen.flow_from_directory(
    val_subset_dir,
    target_size=target_size,
    batch_size=batch_size,
    color_mode='rgb',
    shuffle=False,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_subset_dir,
    target_size=target_size,
    batch_size=batch_size,
    color_mode='rgb',
    shuffle=False,
    class_mode='categorical'
)

# Print labels to ensure correct mapping
labels = list(test_generator.class_indices.keys())
print("Number of classes:", len(labels))
print(labels)

# Use a smaller model for faster training
base_model = MobileNetV2(input_shape=(224, 224, 3), weights='imagenet', include_top=False)
num_classes = len(labels)  # Ensure num_classes matches the number of classes in the data

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)  # Adding dropout for regularization
out = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=out)

# Freeze base model layers
base_model.trainable = False

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
num_epochs = 5  # Reduced number of epochs for faster training
STEP_SIZE_TRAIN = min(len(train_generator), train_generator.n // train_generator.batch_size)
STEP_SIZE_VALID = min(len(valid_generator), valid_generator.n // valid_generator.batch_size)
STEP_SIZE_TEST = min(len(test_generator), test_generator.n // test_generator.batch_size)

history = model.fit(
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    epochs=num_epochs,
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID
)

# Save the model
model.save('/Users/Barbara/Desktop/Ironhack/Final_Project/food_recognition_model4.h5')

# Evaluate the model on the test set
loss, acc = model.evaluate(test_generator, steps=STEP_SIZE_TEST)
print(f"Test accuracy: {acc:.3f}\nTest Loss: {loss:.3f}")

# Confusion matrix and classification report
preds = model.predict(test_generator)
y_pred = np.argmax(preds, axis=1)
y_actual = test_generator.classes
cm = confusion_matrix(y_actual, y_pred)
print(cm)
print(classification_report(y_actual, y_pred, target_names=labels))


In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
from PIL import Image, UnidentifiedImageError
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Custom ImageDataGenerator class
class CustomImageDataGenerator(ImageDataGenerator):
    def flow_from_directory(self, directory, *args, **kwargs):
        generator = super().flow_from_directory(directory, *args, **kwargs)
        self.target_size = kwargs.get('target_size', (224, 224))
        self.num_classes = generator.num_classes
        self.filepaths = generator.filepaths
        self.labels = generator.classes
        return generator

# Function to safely load images
def safe_load_img(path, target_size=(224, 224)):
    try:
        img = Image.open(path)
        img = img.resize(target_size)
        img_array = image.img_to_array(img)
        return img_array / 255.0  # Normalize here
    except Exception as e:
        print(f"Error loading image {path}: {str(e)}")
        return None

# Data Generators
train_datagen = CustomImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

valid_datagen = CustomImageDataGenerator()
test_datagen = CustomImageDataGenerator()

# Directories for data
train_dir = "/Users/Barbara/Desktop/Ironhack/Final_Project/food11/training"
valid_dir   = "/Users/Barbara/Desktop/Ironhack/Final_Project/food11/validation"
test_dir  = "/Users/Barbara/Desktop/Ironhack/Final_Project/food11/evaluation"

# Generators
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224, 224), batch_size=32, class_mode='categorical')
valid_generator = valid_datagen.flow_from_directory(valid_dir, target_size=(224, 224), batch_size=32, class_mode='categorical')
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224, 224), batch_size=32, class_mode='categorical')

# Get the number of classes from the train generator
num_classes = train_generator.num_classes

# Use a simpler model for testing
model = Sequential([
    Flatten(input_shape=(224, 224, 3)),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print model summary
model.summary()

# Custom training loop
num_epochs = 5
batch_size = 32

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    # Training
    train_loss = 0
    train_accuracy = 0
    train_batches = 0
    
    for x_batch, y_batch in train_generator:
        if train_batches >= len(train_generator):
            break
        
        train_result = model.train_on_batch(x_batch, y_batch)
        train_loss += train_result[0]
        train_accuracy += train_result[1]
        train_batches += 1
        
        if train_batches % 10 == 0:
            print(f"Batch {train_batches}: Loss = {train_result[0]:.4f}, Accuracy = {train_result[1]:.4f}")
    
    train_loss /= train_batches
    train_accuracy /= train_batches
    print(f"Training - Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}")
    
    # Validation
    val_loss = 0
    val_accuracy = 0
    val_batches = 0
    
    for x_batch, y_batch in valid_generator:
        if val_batches >= len(valid_generator):
            break
        
        val_result = model.test_on_batch(x_batch, y_batch)
        val_loss += val_result[0]
        val_accuracy += val_result[1]
        val_batches += 1
    
    val_loss /= val_batches
    val_accuracy /= val_batches
    print(f"Validation - Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

# Save the model
model.save('/Users/Barbara/Desktop/Ironhack/Final_Project/food_recognition_model2.h5')

# Evaluate the model on the test set
test_loss = 0
test_accuracy = 0
test_batches = 0
all_predictions = []
all_true_labels = []

for x_batch, y_batch in test_generator:
    if test_batches >= len(test_generator):
        break
    
    test_result = model.test_on_batch(x_batch, y_batch)
    test_loss += test_result[0]
    test_accuracy += test_result[1]
    
    predictions = model.predict_on_batch(x_batch)
    all_predictions.extend(np.argmax(predictions, axis=1))
    all_true_labels.extend(np.argmax(y_batch, axis=1))
    
    test_batches += 1

test_loss /= test_batches
test_accuracy /= test_batches
print(f"Test - Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.4f}")

Found 9866 images belonging to 11 classes.
Found 3430 images belonging to 11 classes.
Found 3347 images belonging to 11 classes.


2024-07-10 16:21:21.735059: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-07-10 16:21:21.735081: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-07-10 16:21:21.735089: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-07-10 16:21:21.735282: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-07-10 16:21:21.735295: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/5


2024-07-10 16:21:43.898043: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Batch 10: Loss = 38459.9570, Accuracy = 0.1312
Batch 20: Loss = 32780.9414, Accuracy = 0.1328
Batch 30: Loss = 25212.1055, Accuracy = 0.1250
Batch 40: Loss = 20246.5820, Accuracy = 0.1273
Batch 50: Loss = 16898.8438, Accuracy = 0.1256
Batch 60: Loss = 14485.8096, Accuracy = 0.1297
Batch 70: Loss = 12734.6758, Accuracy = 0.1299
Batch 80: Loss = 11341.3545, Accuracy = 0.1301
Batch 90: Loss = 10213.5107, Accuracy = 0.1288
Batch 100: Loss = 9319.8447, Accuracy = 0.1291
Batch 110: Loss = 8579.9160, Accuracy = 0.1261
Batch 120: Loss = 7948.5952, Accuracy = 0.1268
Batch 130: Loss = 7416.0024, Accuracy = 0.1238


TimeoutError: [Errno 60] Operation timed out

: 