# Food Image Classification Demo

This notebook demonstrates how to build a simple food classifier using transfer learning, storing data in MinIO, and tracking experiments with MLflow.

## 1. Install Required Packages

In [None]:
!conda create -n food-demo python=3.11 -y
!conda activate food-demo
!pip install --user tensorflow keras pillow boto3 mlflow requests

## 2. Set Up MinIO Connection

In [None]:
import sys
import site
import os

# Add user site-packages to path
user_site = site.getusersitepackages()
if user_site not in sys.path:
    sys.path.append(user_site)

# Check the paths Python is searching
print(f"Python is looking for modules in these directories: {sys.path}")

# Now try importing boto3
import boto3
print(f"Successfully imported boto3 version: {boto3.__version__}")
from botocore.client import Config

# Create a MinIO client
s3_client = boto3.client(
    's3',
    endpoint_url='http://minio:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin',
    config=Config(signature_version='s3v4'),
    region_name='us-east-1'
)

# Create a bucket for food images
bucket_name = 'food-images'
try:
    s3_client.create_bucket(Bucket=bucket_name)
    print(f"Bucket '{bucket_name}' created successfully")
except s3_client.exceptions.BucketAlreadyOwnedByYou:
    print(f"Bucket '{bucket_name}' already exists")
except Exception as e:
    print(f"Error creating bucket: {e}")

## 3. Download Food Images Dataset

We'll use a subset of the Food-101 dataset, focusing on just 3 categories for this demo.

In [None]:
import os
import requests
from PIL import Image
from io import BytesIO
import time

# Create directories for each class
os.makedirs('/tmp/food-data', exist_ok=True)
classes = ['pizza', 'sushi', 'hamburger']

for food_class in classes:
    class_dir = f'/tmp/food-data/{food_class}'
    os.makedirs(class_dir, exist_ok=True)

# Sample image URLs for each class - these are stable image URLs for demonstration
image_urls = {
    'pizza': [
        'https://upload.wikimedia.org/wikipedia/commons/a/a3/Eq_it-na_pizza-margherita_sep2005_sml.jpg',
        'https://www.bora.com/fileadmin/website_content/Rezepte/X_BO_Rezepte/X_BO_Automatikrezepte/Pizza.jpg',
        'https://www.zauberdergewuerze.de/magazin/wp-content/uploads/2023/03/05_oregano_istock-1174701047.jpg',
        'https://hero-pizza.vercel.app/static/media/pizza2.7f95f4bae3bf65fb9fe6.jpeg',
        'https://assets.tmecosys.com/image/upload/t_web_rdp_recipe_584x480_1_5x/img/recipe/ras/Assets/ecaeb2cc-a950-4645-a648-9137305b3287/Derivates/df977b90-193d-49d4-a59d-8dd922bcbf65.jpg'
    ],
    'sushi': [
        'https://assets.tmecosys.com/image/upload/t_web_rdp_recipe_584x480_1_5x/img/recipe/ras/Assets/64EF898D-2EDD-4B47-A456-E6A7D137AC91/Derivates/00f76cac-64f6-4573-be4f-e604a7d99143.jpg',
        'https://images.lecker.de/sushi-selber-machen-b3jpg,id=48df8ccb,b=lecker,w=1200,rm=sk.jpeg',
        'https://www.kindernetz.de/sendungen/schmecksplosion/1712332657170%2Cimage-knet-4154~_v-1x1@2dL_-029cdd853d61a51824ed2ee643deeae504b065c1.jpg',
        'https://www.justonecookbook.com/wp-content/uploads/2020/01/Sushi-Rolls-Maki-Sushi-–-Hosomaki-1106-II.jpg',
        'https://www.spoton.com/blog/content/images/size/w1200/2024/09/1.-what-is-maki-sushi-guide-thin-rolls-hand-roll-shrimp-tempura-tuna-salmon-rice.jpeg'
    ],
    'hamburger': [
        'https://assets.epicurious.com/photos/5c745a108918ee7ab68daf79/1:1/w_2560%2Cc_limit/Smashburger-recipe-120219.jpg',
        'https://www.allrecipes.com/thmb/vpth8WDEhejGg_pD7dQgWZVbjyQ=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc()/8667932-garlic-butter-burger-01-4x3-ccd6c1f3548b4aab83ae65dd4221bc7c.jpg',
        'https://www.southernliving.com/thmb/DChRkqQRlsAwsn5La1ZLprzJSzQ=/1500x0/filters:no_upscale():max_bytes(150000):strip_icc():focal(3073x1792:3075x1794)/ultimate-southern-burger_batch64_beauty01-86-b9c26e256dd34e39b6c0cfb0c02a9fef.jpg',
        'https://www.gilde.no/assets/images/_heroimage/umami-burger.jpg',
        'https://www.v-kitchen.ch/recipe/2382e534-f65b-4669-a5f4-8fc4731ebe45.jpg'
    ]
}

# Download images for each class
for food_class, urls in image_urls.items():
    print(f"Downloading {food_class} images...")
    
    for i, url in enumerate(urls):
        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                # Check if it's a valid image
                img = Image.open(BytesIO(response.content))
                img_path = f"/tmp/food-data/{food_class}/{i+1}.jpg"
                img.save(img_path)
                print(f"  - Saved {img_path}")
            else:
                print(f"  - Failed to download image {i+1} for {food_class}: HTTP {response.status_code}")
        except Exception as e:
            print(f"  - Error downloading image {i+1} for {food_class}: {e}")
        
        # Pause to avoid rate limiting
        time.sleep(0.5)
    
    # Create training and validation folders
    os.makedirs(f'/tmp/train/{food_class}', exist_ok=True)
    os.makedirs(f'/tmp/val/{food_class}', exist_ok=True)
    
    print(f"Downloaded {len(urls)} images for {food_class}")

print("Image download complete!")

### Alternative: Manual Upload

If automatic download doesn't work, you can upload images manually by running these commands in the container:

In [None]:
# Create directories for each class
for food_class in classes:
    os.makedirs(f'/tmp/food-data/{food_class}', exist_ok=True)
    
print("For a real demo, you can upload images directly to these directories:")
for food_class in classes:
    print(f"/tmp/food-data/{food_class}/")

## 4. Upload Images to MinIO

In [None]:
import glob

# Upload the images to MinIO
for food_class in classes:
    images = glob.glob(f'/tmp/food-data/{food_class}/**/*.jpg', recursive=True)
    images += glob.glob(f'/tmp/food-data/{food_class}/**/*.jpeg', recursive=True)
    images += glob.glob(f'/tmp/food-data/{food_class}/**/*.png', recursive=True)
    
    print(f"Found {len(images)} images for {food_class}")
    
    for idx, image_path in enumerate(images):
        image_name = os.path.basename(image_path)
        s3_key = f'{food_class}/{image_name}'
        
        try:
            s3_client.upload_file(image_path, bucket_name, s3_key)
            if idx % 10 == 0 and idx > 0:
                print(f"Uploaded {idx} {food_class} images to MinIO")
        except Exception as e:
            print(f"Error uploading {image_path}: {e}")
    
    print(f"Completed uploading {food_class} images")

## 5. Prepare Training and Validation Data

In [None]:
import numpy as np
import shutil

# Create train and validation directories
train_dir = '/tmp/train'
val_dir = '/tmp/val'

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Download images from MinIO for training
for food_class in classes:
    os.makedirs(f'{train_dir}/{food_class}', exist_ok=True)
    os.makedirs(f'{val_dir}/{food_class}', exist_ok=True)
    
    try:
        # List objects in the bucket for this class
        response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=f'{food_class}/')
        
        if 'Contents' not in response:
            print(f"No images found for {food_class} in MinIO")
            continue
            
        objects = response['Contents']
        print(f"Found {len(objects)} objects for {food_class}")
        
        # Split images 80/20 for training/validation
        total_images = len(objects)
        train_count = int(total_images * 0.8)
        
        for idx, obj in enumerate(objects):
            key = obj['Key']
            filename = os.path.basename(key)
            
            # Skip directory entries or non-image files
            if not filename or not filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
                
            # Determine if this goes to train or validation
            if idx < train_count:
                local_path = f'{train_dir}/{food_class}/{filename}'
            else:
                local_path = f'{val_dir}/{food_class}/{filename}'
                
            # Download the image
            s3_client.download_file(bucket_name, key, local_path)
            
            if idx % 10 == 0 and idx > 0:
                print(f"Downloaded {idx} images for {food_class}")
        
        print(f"Completed downloading {food_class} images for training and validation")
    except Exception as e:
        print(f"Error processing {food_class}: {e}")

## 6. Build and Train the Model with MLflow Tracking

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# Set up MLflow tracking
import mlflow
import mlflow.keras

mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("food-classification")

# Verify training data
for food_class in classes:
    train_images = glob.glob(f'{train_dir}/{food_class}/*')
    val_images = glob.glob(f'{val_dir}/{food_class}/*')
    print(f"{food_class}: {len(train_images)} training images, {len(val_images)} validation images")

# Set up data generators with augmentation for training
img_height, img_width = 224, 224
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

In [None]:
# Build the model using transfer learning with MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(len(classes), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Display model summary
model.summary()

In [None]:
# Configure MLflow to use MinIO for artifacts
import mlflow
import os

# Configure MLflow to use MinIO for artifacts
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "http://minio:9000"
os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"

# Set up tracking server and experiment
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("food-classification-s3")

# Create MinIO bucket for MLflow artifacts if it doesn't exist
import boto3
s3_client = boto3.client(
    's3',
    endpoint_url='http://minio:9000',
    aws_access_key_id='minioadmin',
    aws_secret_access_key='minioadmin'
)

mlflow_bucket = "mlflow-artifacts"
try:
    s3_client.create_bucket(Bucket=mlflow_bucket)
    print(f"Created bucket '{mlflow_bucket}' for MLflow artifacts")
except Exception as e:
    print(f"Bucket '{mlflow_bucket}' might already exist: {e}")

# Start MLflow run with S3 artifacts
with mlflow.start_run() as run:
    # Log parameters
    mlflow.log_param("model_type", "MobileNetV2")
    mlflow.log_param("img_height", img_height)
    mlflow.log_param("img_width", img_width)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("classes", classes)
    mlflow.log_param("epochs", 5)
    
    # For metrics that we already have from previous training:
    for epoch in range(len(history.history['accuracy'])):
        mlflow.log_metric("train_accuracy", history.history['accuracy'][epoch], step=epoch)
        mlflow.log_metric("train_loss", history.history['loss'][epoch], step=epoch)
        mlflow.log_metric("val_accuracy", history.history['val_accuracy'][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history['val_loss'][epoch], step=epoch)
    
    # Try to log model with MinIO as backend
    try:
        # Create a sample input for model signature
        import numpy as np
        sample_input = np.zeros((1, img_height, img_width, 3), dtype=np.float32)
        
        # Log model to MinIO
        artifact_path = "s3://{}/{}".format(mlflow_bucket, run.info.run_id)
        mlflow.keras.log_model(
            model, 
            "keras_model",
            artifact_path=artifact_path,
            signature=mlflow.models.infer_signature(sample_input, model.predict(sample_input))
        )
        print(f"Model saved to artifact store at {artifact_path}")
    except Exception as e:
        print(f"Could not save model to artifact store: {e}")
        # Fall back to saving in MinIO directly
        model_path = '/tmp/food_classifier_model.h5'
        model.save(model_path)
        s3_client.upload_file(model_path, bucket_name, 'models/food_classifier_model.h5')
        print(f"Model saved directly to MinIO: {bucket_name}/models/food_classifier_model.h5")
    
    run_id = run.info.run_id
    print(f"MLflow run logged with S3 artifacts. Run ID: {run_id}")
    print(f"View in MLflow UI: http://localhost:5001/#/experiments/0/runs/{run_id}")

## 7. Test the Model with New Images

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import matplotlib.pyplot as plt

# Function to predict food class
def predict_food(image_path):
    img = image.load_img(image_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    
    predictions = model.predict(img_array)
    predicted_class = classes[np.argmax(predictions[0])]
    confidence = np.max(predictions[0])
    
    # Create a dict of all class predictions for visualization
    all_predictions = {classes[i]: float(predictions[0][i]) for i in range(len(classes))}
    
    return predicted_class, confidence, all_predictions

# Test with validation images
test_images = []
for food_class in classes:
    # Get up to 2 test images from each class in validation set
    class_images = glob.glob(f'{val_dir}/{food_class}/*')[:2]
    test_images.extend(class_images)

# If no validation images found, try with training images
if not test_images:
    for food_class in classes:
        class_images = glob.glob(f'{train_dir}/{food_class}/*')[:2]
        test_images.extend(class_images)

# Display and predict test images
for test_image in test_images:
    # Make prediction
    predicted_class, confidence, all_predictions = predict_food(test_image)
    
    # Display image and prediction
    img = image.load_img(test_image)
    plt.figure(figsize=(8, 6))
    plt.imshow(img)
    plt.title(f"Prediction: {predicted_class} ({confidence:.2f})\nTrue class: {os.path.basename(os.path.dirname(test_image))}")
    plt.axis('off')
    plt.show()
    
    # Show prediction breakdown
    plt.figure(figsize=(10, 2))
    plt.barh(list(all_predictions.keys()), list(all_predictions.values()))
    plt.xlabel('Confidence')
    plt.title('Class Predictions')
    plt.xlim(0, 1)
    plt.tight_layout()
    plt.show()
    
    print(f"Image: {test_image}")
    print(f"Prediction: {predicted_class} with {confidence:.2f} confidence")
    print(f"True class: {os.path.basename(os.path.dirname(test_image))}")
    print("All predictions:", all_predictions)
    print("---")

## 8. Load Model from MLflow and Make Predictions

In [None]:
import mlflow.keras
import boto3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
import numpy as np

# Set up environment variables for MLflow to connect to MinIO
import os
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "http://minio:9000"
os.environ["AWS_ACCESS_KEY_ID"] = "minioadmin"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minioadmin"

# Check if your model exists in MLflow 
mlflow.set_tracking_uri("http://mlflow:5000")

# Find your latest run
latest_runs = mlflow.search_runs(experiment_ids=["0"], max_results=1, order_by=["start_time DESC"])
if len(latest_runs) > 0:
    run_id = latest_runs.iloc[0].run_id
    print(f"Found latest run: {run_id}")
else:
    run_id = "paste_your_run_id_here"  # Fallback to manual input
    print(f"Using manually specified run ID: {run_id}")

# Try to load the model from MLflow with S3 artifact store
try:
    # First try with the "model" artifact path
    model_uri = f"runs:/{run_id}/model"
    loaded_model = mlflow.keras.load_model(model_uri)
    print(f"Model loaded successfully from MLflow: {model_uri}")
except Exception as e:
    print(f"Error loading model from path 'model': {e}")
    try:
        # If that fails, try with "keras_model" path instead
        model_uri = f"runs:/{run_id}/keras_model"
        loaded_model = mlflow.keras.load_model(model_uri)
        print(f"Model loaded successfully from MLflow: {model_uri}")
    except Exception as e:
        print(f"Error loading model from path 'keras_model': {e}")
        print("Falling back to model in memory or loading from MinIO directly")
        
        # As a fallback, try to load the model from MinIO directly
        try:
            # Create a MinIO client
            s3_client = boto3.client(
                's3',
                endpoint_url='http://minio:9000',
                aws_access_key_id='minioadmin',
                aws_secret_access_key='minioadmin'
            )
            
            # Download model from MinIO
            bucket_name = 'food-images'  # Use your bucket name
            model_path = '/tmp/food_classifier_model.h5'
            s3_client.download_file(bucket_name, 'models/food_classifier_model.h5', model_path)
            
            # Load model from downloaded file
            from tensorflow.keras.models import load_model
            loaded_model = load_model(model_path)
            print(f"Model loaded from MinIO: {bucket_name}/models/food_classifier_model.h5")
        except Exception as e:
            print(f"Error loading model from MinIO: {e}")
            print("Using the model from memory instead")
            loaded_model = model  # Fallback to model in memory

# Create a prediction function
def predict_image(image_path):
    # Preprocess the image
    img = image.load_img(image_path, target_size=(img_height, img_width))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    
    # Make prediction
    predictions = loaded_model.predict(img_array)
    predicted_class = classes[np.argmax(predictions[0])]
    confidence = float(np.max(predictions[0]))
    
    return {
        "class": predicted_class,
        "confidence": confidence,
        "all_predictions": {classes[i]: float(predictions[0][i]) for i in range(len(classes))}
    }

# Test the prediction function with a test image
if test_images:
    result = predict_image(test_images[0])
    print(f"Prediction using loaded model: {result['class']} with {result['confidence']:.2f} confidence")
    print(f"All predictions: {result['all_predictions']}")