# 🍎 Fruit Ripeness Classification Training

This notebook trains a deep learning model to classify fruit ripeness using transfer learning with MobileNetV2.

## Setup Instructions
1. Upload your dataset to Google Drive in this structure:
   ```
   /content/drive/MyDrive/fruit_dataset/
   ├── train/
   │   ├── banana_ripe/
   │   ├── banana_unripe/
   │   ├── apple_ripe/
   │   └── apple_unripe/
   └── val/ (optional)
   ```
2. Run all cells in order
3. Download the trained model and labels.txt
4. Place them in your project's `models/` directory

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Imports
import os
import pathlib
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import zipfile

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Configuration
DATA_PATH = "/content/drive/MyDrive/fruit_dataset"  # Update this path
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS_INITIAL = 10
EPOCHS_FINETUNE = 5
LEARNING_RATE_INITIAL = 0.001
LEARNING_RATE_FINETUNE = 0.0001

# Check if data path exists
if not os.path.exists(DATA_PATH):
    print(f"❌ Data path not found: {DATA_PATH}")
    print("Please upload your dataset to Google Drive and update DATA_PATH")
else:
    print(f"✅ Data path found: {DATA_PATH}")
    print(f"Contents: {os.listdir(DATA_PATH)}")

In [None]:
# Create datasets
train_dir = os.path.join(DATA_PATH, "train")
val_dir = os.path.join(DATA_PATH, "val")

# Check if separate validation directory exists
if os.path.exists(val_dir) and os.listdir(val_dir):
    print("📁 Using separate train/val directories")
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        shuffle=True
    )
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        val_dir,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        shuffle=False
    )
else:
    print("📁 Splitting training data (80/20)")
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        validation_split=0.2,
        subset="training",
        seed=42
    )
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        train_dir,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        validation_split=0.2,
        subset="validation",
        seed=42
    )

class_names = train_ds.class_names
num_classes = len(class_names)

print(f"📊 Found {num_classes} classes: {class_names}")
print(f"🔢 Training batches: {tf.data.experimental.cardinality(train_ds).numpy()}")
print(f"🔢 Validation batches: {tf.data.experimental.cardinality(val_ds).numpy()}")

In [None]:
# Visualize some training images
plt.figure(figsize=(12, 8))
for images, labels in train_ds.take(1):
    for i in range(min(9, len(images))):
        plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(f"{class_names[labels[i]]}")
        plt.axis("off")
plt.tight_layout()
plt.show()

In [None]:
# Optimize dataset performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)

# Data augmentation
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1),
])

print("✅ Data preparation complete")

In [None]:
# Create model with transfer learning
base_model = tf.keras.applications.MobileNetV2(
    input_shape=IMG_SIZE + (3,),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # Freeze base model initially

# Model architecture
inputs = tf.keras.Input(shape=IMG_SIZE + (3,))
x = tf.keras.applications.mobilenet_v2.preprocess_input(inputs)
x = data_augmentation(x)
x = base_model(x, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)

# Compile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE_INITIAL),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("🏗️ Model created successfully")
model.summary()

In [None]:
# Initial training (frozen base)
print(f"🚀 Starting initial training ({EPOCHS_INITIAL} epochs)...")

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2,
        min_lr=1e-7
    )
]

history1 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_INITIAL,
    callbacks=callbacks,
    verbose=1
)

print("✅ Initial training complete")

In [None]:
# Fine-tuning (unfreeze some layers)
print("🔧 Fine-tuning model...")
base_model.trainable = True

# Freeze early layers, fine-tune later layers
for layer in base_model.layers[:-20]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE_FINETUNE),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Continue training
history2 = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_FINETUNE,
    initial_epoch=len(history1.history['accuracy']),
    callbacks=callbacks,
    verbose=1
)

print("✅ Fine-tuning complete")

In [None]:
# Plot training history
def plot_training_history(hist1, hist2):
    # Combine histories
    acc = hist1.history['accuracy'] + hist2.history['accuracy']
    val_acc = hist1.history['val_accuracy'] + hist2.history['val_accuracy']
    loss = hist1.history['loss'] + hist2.history['loss']
    val_loss = hist1.history['val_loss'] + hist2.history['val_loss']
    
    epochs_range = range(len(acc))
    
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.axvline(x=len(hist1.history['accuracy'])-1, color='r', linestyle='--', alpha=0.5, label='Fine-tuning starts')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.axvline(x=len(hist1.history['loss'])-1, color='r', linestyle='--', alpha=0.5, label='Fine-tuning starts')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    
    plt.tight_layout()
    plt.show()

plot_training_history(history1, history2)

# Final evaluation
val_loss, val_accuracy = model.evaluate(val_ds, verbose=0)
print(f"📊 Final validation accuracy: {val_accuracy:.4f}")
print(f"📊 Final validation loss: {val_loss:.4f}")

In [None]:
# Save model and create download package
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
model_name = f"fruit_model_{timestamp}"

# Save model
model.save(f"/content/{model_name}")

# Save labels
with open(f"/content/{model_name}/labels.txt", 'w') as f:
    for class_name in class_names:
        f.write(f"{class_name}\n")

# Create zip file for download
zip_path = f"/content/{model_name}.zip"
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(f"/content/{model_name}"):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, f"/content/{model_name}")
            zipf.write(file_path, arcname)

print(f"💾 Model saved as: {model_name}")
print(f"📦 Download package created: {zip_path}")
print("\n🎯 Next steps:")
print("1. Download the zip file from the Files panel")
print("2. Extract it to your project's models/ directory")
print("3. Update MODEL_PATH in your .env file")
print("4. Restart your Flask app")

# Show download link
from google.colab import files
print("\n⬇️ Downloading model...")
files.download(zip_path)

In [None]:
# Test some predictions
plt.figure(figsize=(15, 10))
for images, labels in val_ds.take(1):
    predictions = model.predict(images)
    for i in range(min(9, len(images))):
        plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        
        predicted_class = class_names[np.argmax(predictions[i])]
        true_class = class_names[labels[i]]
        confidence = np.max(predictions[i])
        
        color = 'green' if predicted_class == true_class else 'red'
        plt.title(f"True: {true_class}\nPred: {predicted_class} ({confidence:.2f})", color=color)
        plt.axis("off")
plt.tight_layout()
plt.show()