# 🍅 Multiclass Tomato Disease Classification
This notebook trains ResNet50, EfficientNet-B0, and DenseNet121 to classify tomato leaf images into one of eight categories.

In [None]:
# 1️⃣ Install required packages
pip install torch torchvision matplotlib pandas scikit-learn

In [None]:
# 2️⃣ Imports & GPU check
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import datasets, transforms, models
from torch import nn, optim
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix
print("CUDA:", torch.cuda.is_available(), "Device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

In [None]:
# 3️⃣ Dataset viewing: count images per class & bar chart
base = "/workspace/datacentervision/Tomato-Village/Variant-a(Multiclass Classification)"
splits = ["train", "val", "test"]
counts = {}
for split in splits:
    path = os.path.join(base, split)
    for cls in os.listdir(path):
        counts.setdefault(split, {})[cls] = len(os.listdir(os.path.join(path, cls)))

# convert to DataFrame
df_counts = pd.DataFrame(counts).T
print(df_counts)

# bar chart
df_counts.plot(kind="bar", figsize=(10,5))
plt.title("Images per Class in train/val/test")
plt.xlabel("Dataset Split"); plt.ylabel("Count"); plt.xticks(rotation=0)
plt.grid(axis="y"); plt.tight_layout(); plt.show()

In [None]:
# 4️⃣ Preprocessing & (unused) augmentation pipelines
train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
# safety augmentation (defined but not applied):
aug_transform = transforms.Compose([
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(15)
])
val_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

In [None]:
# 5️⃣ DataLoaders
batch_size = 32
data_dirs = {split: os.path.join(base, split) for split in splits}
datasets_dict = {
    split: datasets.ImageFolder(data_dirs[split],
        transform=(train_transform if split=="train" else val_transform))
    for split in splits
}
loaders = {
    split: DataLoader(datasets_dict[split], batch_size=batch_size, shuffle=(split=="train"), num_workers=4)
    for split in splits
}
class_names = datasets_dict["train"].classes
num_classes = len(class_names)
print("Classes:", class_names)


In [None]:
# 6️⃣ Models to compare
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_constructors = {
    "ResNet50":    lambda: models.resnet50(pretrained=True),
    "EfficientNetB0": lambda: models.efficientnet_b0(pretrained=True),
    "DenseNet121": lambda: models.densenet121(pretrained=True)
}

def build_model(name):
    m = model_constructors[name]()
    # replace classifier head
    if "resnet" in name.lower():
        m.fc = nn.Linear(m.fc.in_features, num_classes)
    elif "efficientnet" in name.lower():
        m.classifier[1] = nn.Linear(m.classifier[1].in_features, num_classes)
    else:
        m.classifier = nn.Linear(m.classifier.in_features, num_classes)
    return m.to(device)


In [None]:
# 7️⃣ Training & evaluation loops
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * imgs.size(0)
    return running_loss / len(loader.dataset)

def eval_model(model, loader, criterion):
    model.eval()
    running_loss, correct = 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            all_preds += preds.cpu().tolist()
            all_labels += labels.cpu().tolist()
    acc = correct / len(loader.dataset)
    report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True)
    confmat = confusion_matrix(all_labels, all_preds)
    return running_loss/len(loader.dataset), acc, report, confmat


In [None]:
# 8️⃣ Run experiments + Save models
import os
os.makedirs("saved_models", exist_ok=True)  # ensure folder exists

results = {}
epochs = 30
for name in model_constructors:
    print(f"\n🔄 Training {name}")
    model = build_model(name)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    history = {"train_loss": [], "val_loss": [], "val_acc": []}

    for epoch in range(epochs):
        tl = train_one_epoch(model, loaders["train"], criterion, optimizer)
        vl, vacc, _, _ = eval_model(model, loaders["val"], criterion)
        print(f" {name} Epoch {epoch+1}/{epochs}: train_loss={tl:.4f}, val_loss={vl:.4f}, val_acc={vacc:.4f}")
        history["train_loss"].append(tl)
        history["val_loss"].append(vl)
        history["val_acc"].append(vacc)

    # ✅ Final test evaluation
    test_loss, test_acc, test_report, test_confmat = eval_model(model, loaders["test"], criterion)

    # ✅ Save trained model to results + disk
    results[name] = {
        "model": model,
        "history": history,
        "test_loss": test_loss,
        "test_acc": test_acc,
        "report": test_report,
        "confmat": test_confmat
    }

    save_path = f"saved_models/{name}.pth"
    torch.save(model.state_dict(), save_path)
    print(f"✅ Model {name} saved to {save_path}")

In [None]:
# 9️⃣ Plot Loss & Accuracy Curves for each model
for name, res in results.items():
    h = res["history"]
    epochs_range = range(1, epochs+1)
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1)
    plt.plot(epochs_range, h["train_loss"], label="train_loss")
    plt.plot(epochs_range, h["val_loss"],   label="val_loss")
    plt.title(f"{name} Loss"); plt.xlabel("Epoch"); plt.legend(); plt.grid(True)
    plt.subplot(1,2,2)
    plt.plot(epochs_range, h["val_acc"], label="val_acc")
    plt.title(f"{name} Val Accuracy"); plt.xlabel("Epoch"); plt.legend(); plt.grid(True)
    plt.tight_layout(); plt.show()


In [None]:
# 🔟 Print Evaluation Matrix & Confusion Matrix
for name, res in results.items():
    print(f"\n🏁 {name} Test Accuracy: {res['test_acc']:.4f}  Test Loss: {res['test_loss']:.4f}")
    df_rep = pd.DataFrame(res["report"]).T
    print("Classification Report:\n", df_rep.round(3))
    # Confusion matrix heatmap
    plt.figure(figsize=(6,5))
    cm = res["confmat"]
    plt.imshow(cm, interpolation='nearest')
    plt.title(f"{name} Confusion Matrix"); plt.colorbar()
    tick_marks = np.arange(num_classes)
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)
    plt.ylabel('True label'); plt.xlabel('Predicted label')
    plt.tight_layout(); plt.show()


In [None]:
# 1️⃣1️⃣ Summary comparison table
summary = []
for name, res in results.items():
    acc = res["test_acc"]
    loss = res["test_loss"]
    m_f1 = res["report"]["accuracy"]
    summary.append([name, acc, loss, m_f1])
df_summary = pd.DataFrame(summary, columns=["Model","Test Acc","Test Loss","Overall Acc"]).set_index("Model")
print(df_summary.round(3))
