In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import zipfile
import os

zip_path = '/content/drive/MyDrive/Tomato_5_class.zip'  # Update this path if needed
extract_path = '/content/Tomato_5_class'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

os.listdir(extract_path)  # Check contents

Mounted at /content/drive


['Tomato_5_class']

model

In [1]:
# 🚩 Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import torch
import torch.nn as nn
from torch.optim import AdamW
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from transformers import ViTForImageClassification
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np

# ✅ Config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Paths
data_dir = "/content/drive/MyDrive/Tomato Maturity Detection Dataset/Augment Dataset"
save_path = "/content/drive/MyDrive/2classbestmodel.pth"

# 🔁 Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

# 📦 Dataset & Dataloader
full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
class_names = full_dataset.classes  # ['IMMATURE', 'MATURE']
print("Classes:", class_names)

from torch.utils.data import random_split
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_data, val_data = random_split(full_dataset, [train_size, val_size])
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

# ✅ Model Setup
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=2  # 2 classes: MATURE / IMMATURE
)
model.to(device)

optimizer = AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

# 🔁 Training & Evaluation Functions
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for imgs, labels in tqdm(loader, desc="Training"):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs).logits
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return total_loss / len(loader), correct / total

def evaluate(model, loader, criterion):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for imgs, labels in tqdm(loader, desc="Evaluating"):
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs).logits
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return total_loss / len(loader), correct / total

# 🧠 Training Loop
EPOCHS = 5
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []
best_val_acc = 0.0

for epoch in range(EPOCHS):
    print(f"\n📚 Epoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Train Loss: {train_loss:.4f} | Accuracy: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Accuracy: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), save_path)
        print(f"✅ Best model saved to Google Drive at: {save_path}")

# 📈 Plot Learning Curves
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_accuracies, label='Train Acc')
plt.plot(val_accuracies, label='Val Acc')
plt.title("Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.title("Loss")
plt.legend()
plt.show()


Mounted at /content/drive
Using device: cuda


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Tomato Maturity Detection Dataset/Augment Dataset'

In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import numpy as np

# Set mixed precision policy
tf.keras.mixed_precision.set_global_policy('float32')

# Dataset paths
train_path = "/content/Tomato_5_class/Tomato_5_class/Training_set"
test_path = "/content/Tomato_5_class/Tomato_5_class/Testing_set"

# Enhanced data augmentation for plant disease classification
train_datagen = ImageDataGenerator(
    rotation_range=45,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=[0.7, 1.3],
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='reflect',
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=tf.keras.applications.efficientnet.preprocess_input
)

# Data generators
train_gen = train_datagen.flow_from_directory(
    train_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True
)

test_gen = test_datagen.flow_from_directory(
    test_path,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=False
)

# Custom attention layer
class SelfAttention(layers.Layer):
    def __init__(self, embedding_dim):
        super(SelfAttention, self).__init__()
        self.embedding_dim = embedding_dim
        self.query = layers.Dense(embedding_dim)
        self.key = layers.Dense(embedding_dim)
        self.value = layers.Dense(embedding_dim)

    def call(self, inputs):
        # Get query, key, value projections
        query = self.query(inputs)
        key = self.key(inputs)
        value = self.value(inputs)

        # Reshape for matrix multiplication
        batch_size = tf.shape(inputs)[0]

        # Using einsum for cleaner matrix multiplication
        score = tf.einsum('bij,bkj->bik', query, key)

        # Scale
        scale = tf.math.sqrt(tf.cast(self.embedding_dim, tf.float32))
        scaled_score = score / scale

        # Softmax for attention weights
        attention_weights = tf.nn.softmax(scaled_score, axis=-1)

        # Apply attention to values
        context = tf.einsum('bij,bjk->bik', attention_weights, value)

        return context

# Enhanced model without complex transformer parts
def create_enhanced_model(num_classes=5):
    # Input layer
    inputs = layers.Input(shape=(224, 224, 3))

    # CNN Branch 1: EfficientNetB2
    effnet = tf.keras.applications.EfficientNetB2(
        include_top=False,
        weights='imagenet',
        input_tensor=inputs
    )

    # Initially freeze all layers
    for layer in effnet.layers:
        layer.trainable = False

    effnet_features = layers.GlobalAveragePooling2D()(effnet.output)

    # CNN Branch 2: DenseNet121 (good alternative with different architecture)
    densenet = tf.keras.applications.DenseNet121(
        include_top=False,
        weights='imagenet',
        input_tensor=inputs
    )

    # Initially freeze all layers
    for layer in densenet.layers:
        layer.trainable = False

    densenet_features = layers.GlobalAveragePooling2D()(densenet.output)

    # Feature processing with dedicated heads
    effnet_features = layers.Dense(512, activation='swish')(effnet_features)
    effnet_features = layers.BatchNormalization()(effnet_features)
    effnet_features = layers.Dropout(0.3)(effnet_features)

    densenet_features = layers.Dense(512, activation='swish')(densenet_features)
    densenet_features = layers.BatchNormalization()(densenet_features)
    densenet_features = layers.Dropout(0.3)(densenet_features)

    # Feature fusion
    combined = layers.Concatenate()([effnet_features, densenet_features])

    # Reshape for self-attention
    combined = layers.Reshape((1, 1024))(combined)

    # Apply attention - using our proper Keras attention layer
    attention_output = SelfAttention(1024)(combined)

    # Flatten
    features = layers.Flatten()(attention_output)

    # Deep classifier
    x = layers.Dense(768, activation='swish')(features)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(384, activation='swish')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(192, activation='swish')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)

    # Output
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs=inputs, outputs=outputs)

# Focal Loss
class FocalLoss(tf.keras.losses.Loss):
    def __init__(self, alpha=0.25, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def call(self, y_true, y_pred):
        ce = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=False)
        pt = tf.exp(-ce)
        return tf.reduce_mean(self.alpha * (1-pt)**self.gamma * ce)

# Initialize model
model = create_enhanced_model()

# Compile
model.compile(
    optimizer=optimizers.AdamW(learning_rate=1e-4, weight_decay=1e-5),
    loss=FocalLoss(),
    metrics=['accuracy']
)

# Class weights for imbalanced data
class_counts = np.bincount(train_gen.classes)
total = len(train_gen.classes)
class_weights = {i: total/(len(class_counts)*count) for i, count in enumerate(class_counts)}

# Callbacks
callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3),
    ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_accuracy')
]

# First training phase - train with frozen layers
print("Phase 1: Training with frozen backbone...")
history1 = model.fit(
    train_gen,
    validation_data=test_gen,
    epochs=15,
    class_weight=class_weights,
    callbacks=callbacks
)

# Unfreeze layers for fine-tuning
# print("Unfreezing EfficientNet layers for fine-tuning...")
# for layer in model.layers:
#     if isinstance(layer, tf.keras.applications.EfficientNetB2):
#         for i, l in enumerate(layer.layers):
#             if i > 300:  # Unfreeze the last layers
#                 l.trainable = True

# print("Unfreezing DenseNet layers for fine-tuning...")
# for layer in model.layers:
#     if isinstance(layer, tf.keras.applications.DenseNet121):
#         for i, l in enumerate(layer.layers):
#             if i > 300:  # Unfreeze the last layers
#                 l.trainable = True

# Unfreeze layers for fine-tuning
print("Unfreezing EfficientNet layers for fine-tuning...")
for layer in model.layers:
    if layer.name.startswith('efficientnetb2'):  # Check by name prefix
        for i, l in enumerate(layer.layers):
            if i > 300:  # Unfreeze the last layers
                l.trainable = True

print("Unfreezing DenseNet layers for fine-tuning...")
for layer in model.layers:
    if layer.name.startswith('densenet121'):  # Check by name prefix
        for i, l in enumerate(layer.layers):
            if i > 300:  # Unfreeze the last layers
                l.trainable = True

# Recompile with lower learning rate
model.compile(
    optimizer=optimizers.AdamW(learning_rate=5e-5, weight_decay=1e-6),
    loss=FocalLoss(),
    metrics=['accuracy']
)

# Second training phase - fine-tune
print("Phase 2: Fine-tuning...")
history2 = model.fit(
    train_gen,
    validation_data=test_gen,
    epochs=15,
    class_weight=class_weights,
    callbacks=callbacks,
    initial_epoch=history1.epoch[-1] + 1
)

# Load best model and evaluate
try:
    model.load_weights('best_model.keras')
except:
    # Fall back to HDF5 format if needed
    model.save('best_model.h5')
    model = tf.keras.models.load_model('best_model.h5',
                                       custom_objects={'FocalLoss': FocalLoss,
                                                      'SelfAttention': SelfAttention})

test_loss, test_acc = model.evaluate(test_gen)
print(f"\nFinal Test Accuracy: {test_acc:.4f}")
print(f"Final Test Loss: {test_loss:.4f}")

# If you want to save the final model
model.save('tomato_classifier_final.keras')

Found 5122 images belonging to 5 classes.
Found 1275 images belonging to 5 classes.
Phase 1: Training with frozen backbone...
Epoch 1/15
[1m321/321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 472ms/step - accuracy: 0.3144 - loss: 0.3937 - val_accuracy: 0.6149 - val_loss: 0.1495 - learning_rate: 1.0000e-04
Epoch 2/15
[1m321/321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 295ms/step - accuracy: 0.4242 - loss: 0.2893 - val_accuracy: 0.6282 - val_loss: 0.1396 - learning_rate: 1.0000e-04
Epoch 3/15
[1m321/321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 274ms/step - accuracy: 0.4774 - loss: 0.2573 - val_accuracy: 0.6275 - val_loss: 0.1358 - learning_rate: 1.0000e-04
Epoch 4/15
[1m321/321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 273ms/step - accuracy: 0.4873 - loss: 0.2445 - val_accuracy: 0.6196 - val_loss: 0.1362 - learning_rate: 1.0000e-04
Epoch 5/15
[1m321/321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 279ms/step - accuracy:

  saveable.load_own_variables(weights_store.get(inner_path))


[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 227ms/step - accuracy: 0.8717 - loss: 0.0564

Final Test Accuracy: 0.6447
Final Test Loss: 0.1249
