In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import OneCycleLR
import torchvision.transforms as transforms
from torchvision import datasets
# Use a bigger EfficientNet variant, e.g. b3
from torchvision.models import efficientnet_b3

import numpy as np
import os
from torch.utils.data import DataLoader
import random
import math
import copy

import matplotlib.pyplot as plt


In [2]:

# --------------------------
# Mixup Utilities
# --------------------------

def mixup_data(inputs, labels, alpha=1.0):
    """Returns mixed inputs, pairs of targets, and lambda."""
    if alpha <= 0:
        return inputs, labels, labels, 1.0

    batch_size = inputs.size(0)
    lam = np.random.beta(alpha, alpha)
    index = torch.randperm(batch_size).to(inputs.device)

    mixed_inputs = lam * inputs + (1 - lam) * inputs[index, :]
    labels_a, labels_b = labels, labels[index]
    return mixed_inputs, labels_a, labels_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


In [3]:

# --------------------------
# Hyperparameters
# --------------------------
IMAGE_SIZE = 300  # Larger than 224 for a bigger EfficientNet
BATCH_SIZE = 32   # You might need to reduce if you run out of memory
EPOCHS = 40
FREEZE_EPOCH = 5   # epoch to partially unfreeze
UNFREEZE_EPOCH = 15  # epoch to unfreeze more layers

# Adjust MixUp alpha if needed
MIXUP_ALPHA = 0.4

# Label smoothing can be reduced or removed if it's hurting performance
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)


In [4]:
# --------------------------
# Data Augmentation
# --------------------------

# Stronger augmentation for training
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# Testing/Validation transforms
test_transforms = transforms.Compose([
    transforms.Resize(IMAGE_SIZE + 32),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

train_dir = "data/train"
test_dir  = "data/test"

image_datasets = {
    'train': datasets.ImageFolder(train_dir, transform=train_transforms),
    'test': datasets.ImageFolder(test_dir, transform=test_transforms)
}

dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=4),
    'test': DataLoader(image_datasets['test'], batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}
class_names = image_datasets['train'].classes
num_classes = len(class_names)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [5]:
# --------------------------
# Model Setup
# --------------------------

# Use a bigger EfficientNet (b3). If memory is an issue, revert to b0.
model = efficientnet_b3(pretrained=True)

# Number of features in the last linear layer
num_ftrs = model.classifier[1].in_features

# Replace the classifier
# Use a deeper classifier
model.classifier = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(inplace=True),
    nn.Dropout(0.4),
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(inplace=True),
    nn.Dropout(0.3),
    nn.Linear(256, num_classes)
)

model = model.to(device)





In [6]:
# --------------------------
# Freezing Strategy
# --------------------------
# 1) Initially freeze all layers except the classifier
for param in model.features.parameters():
    param.requires_grad = False

# --------------------------
# Optimizer and Scheduler
# --------------------------
# Start optimizing only the classifier with a moderate LR
optimizer = optim.AdamW(model.classifier.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = OneCycleLR(optimizer, max_lr=1e-3, steps_per_epoch=len(dataloaders['train']), epochs=EPOCHS)



In [7]:
# --------------------------
# Training Function
# --------------------------
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=30):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    # Keep track of loss/accuracy for plotting
    train_losses, test_losses = [], []
    train_accuracies, test_accuracies = [], []

    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs-1}")
        print('-' * 10)

        # ------------------------------
        # Progressive Unfreezing
        # ------------------------------
        if epoch == FREEZE_EPOCH:
            print("Unfreezing top layers...")
            # Example: Unfreeze the last 2 blocks of EfficientNet
            # for param in model.features[6:].parameters():
            #     param.requires_grad = True
            #
            # Or unfreeze half the blocks, etc. 
            # For simplicity here, let's unfreeze the last half:
            # (b3 has about 8 blocks total, you can tune carefully)
            total_blocks = len(model.features)
            blocks_to_unfreeze = total_blocks // 2  # last half
            for i in range(blocks_to_unfreeze, total_blocks):
                for param in model.features[i].parameters():
                    param.requires_grad = True

            # Redefine optimizer to now include these layers
            optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), 
                                    lr=5e-4, weight_decay=1e-5)
            scheduler = OneCycleLR(optimizer, max_lr=5e-4, 
                                   steps_per_epoch=len(dataloaders['train']), 
                                   epochs=num_epochs - FREEZE_EPOCH, 
                                   pct_start=0.3)

        if epoch == UNFREEZE_EPOCH:
            print("Unfreezing remaining layers (full fine-tuning)...")
            # Unfreeze entire backbone
            for param in model.features.parameters():
                param.requires_grad = True

            # Redefine the optimizer & scheduler
            optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
            scheduler = OneCycleLR(optimizer, max_lr=1e-4, 
                                   steps_per_epoch=len(dataloaders['train']), 
                                   epochs=num_epochs - UNFREEZE_EPOCH, 
                                   pct_start=0.3)

        # Each epoch has a training and a validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                if phase == 'train':
                    # Mixup augmentation
                    inputs_mixed, labels_a, labels_b, lam = mixup_data(inputs, labels, alpha=MIXUP_ALPHA)
                    outputs = model(inputs_mixed)
                    loss = mixup_criterion(criterion, outputs, labels_a, labels_b, lam)
                else:
                    # Validation/test without mixup
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                # Predictions
                _, preds = torch.max(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    scheduler.step()

                running_loss += loss.item() * inputs.size(0)
                # For mixup, we'll still approximate accuracy by comparing preds with the original labels
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if phase == 'train':
                train_losses.append(epoch_loss)
                train_accuracies.append(epoch_acc.item())
            else:
                test_losses.append(epoch_loss)
                test_accuracies.append(epoch_acc.item())

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    print(f"Best test Acc: {best_acc:.4f}")
    # load best model weights
    model.load_state_dict(best_model_wts)

    return model, (train_losses, test_losses, train_accuracies, test_accuracies)


In [8]:
# --------------------------
# Train
# --------------------------
model, history = train_model(
    model=model,
    dataloaders=dataloaders,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    num_epochs=EPOCHS
)

train_losses, test_losses, train_accuracies, test_accuracies = history


Epoch 0/39
----------


train Loss: 3.3696 Acc: 0.0458
test Loss: 3.2427 Acc: 0.1017

Epoch 1/39
----------
train Loss: 3.1393 Acc: 0.0959
test Loss: 3.0461 Acc: 0.1695

Epoch 2/39
----------
train Loss: 2.8708 Acc: 0.1571
test Loss: 2.8145 Acc: 0.2754

Epoch 3/39
----------
train Loss: 2.6312 Acc: 0.2123
test Loss: 2.6170 Acc: 0.3432

Epoch 4/39
----------
train Loss: 2.4195 Acc: 0.2123
test Loss: 2.4107 Acc: 0.3941

Epoch 5/39
----------
Unfreezing top layers...
train Loss: 2.3284 Acc: 0.2363
test Loss: 2.3386 Acc: 0.4153

Epoch 6/39
----------
train Loss: 2.3214 Acc: 0.3086
test Loss: 2.2502 Acc: 0.4280

Epoch 7/39
----------
train Loss: 2.1123 Acc: 0.3292
test Loss: 2.1246 Acc: 0.4492

Epoch 8/39
----------
train Loss: 2.0732 Acc: 0.3373
test Loss: 1.9810 Acc: 0.5000

Epoch 9/39
----------
train Loss: 1.9133 Acc: 0.4110
test Loss: 1.8231 Acc: 0.5424

Epoch 10/39
----------
train Loss: 1.8361 Acc: 0.4221
test Loss: 1.7682 Acc: 0.5508

Epoch 11/39
----------
train Loss: 1.7501 Acc: 0.3600
test Loss: 1.7938 

python(27180) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(27181) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(27182) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(27183) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.7026 Acc: 0.5847

Epoch 18/39
----------


python(27261) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(27267) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(27269) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(27270) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


train Loss: 1.5558 Acc: 0.4615


python(28282) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(28283) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(28284) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(28285) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.7049 Acc: 0.5975

Epoch 19/39
----------


python(28400) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(28401) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(28402) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(28403) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


train Loss: 1.3686 Acc: 0.4700


python(29272) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(29273) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(29274) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(29275) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.7099 Acc: 0.5932

Epoch 20/39
----------


python(29415) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(29417) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(29418) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(29419) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


train Loss: 1.3161 Acc: 0.4435


python(30294) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(30295) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(30296) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(30297) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.7241 Acc: 0.5805

Epoch 21/39
----------


python(30377) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(30378) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(30379) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(30380) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


train Loss: 1.2896 Acc: 0.4259


python(31286) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31287) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31288) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31289) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.7258 Acc: 0.6102

Epoch 22/39
----------


python(31434) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31435) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31436) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(31437) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


train Loss: 1.3286 Acc: 0.4833


python(32311) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(32312) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(32313) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(32314) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.7370 Acc: 0.5932

Epoch 23/39
----------


python(32402) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(32403) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(32404) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(32405) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


train Loss: 1.2896 Acc: 0.6284


python(34565) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34566) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34567) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34568) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


test Loss: 1.6832 Acc: 0.6271

Epoch 24/39
----------


python(34858) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34859) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34860) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(34861) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


: 

In [1]:
# --------------------------
# Save best model
# --------------------------
torch.save(model.state_dict(), "plant_disease_efficientnet_b3_mixup_best.pth")


NameError: name 'torch' is not defined

In [2]:


# --------------------------
# Visualization of Predictions
# --------------------------
def visualize_model(model, dataloader, class_names, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    plt.figure()

    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size(0)):
                images_so_far += 1

                plt.subplot(num_images//2, 2, images_so_far)
                plt.axis('off')
                plt.title(f'pred: {class_names[preds[j]]}')

                img = inputs[j].cpu().numpy().transpose((1, 2, 0))
                img = std * img + mean
                img = np.clip(img, 0, 1)

                plt.imshow(img)
                if images_so_far == num_images:
                    model.train(mode=was_training)
                    plt.show()
                    return
    model.train(mode=was_training)

print("Visualizing some predictions...")
visualize_model(model, dataloaders['test'], class_names)



Visualizing some predictions...


NameError: name 'model' is not defined

In [3]:
# --------------------------
# Plot Accuracy and Loss
# --------------------------
# We'll make separate plots for clarity.

# 1) Plot training vs. test accuracy
plt.figure()
plt.plot(range(EPOCHS), train_accuracies, label='Train Accuracy')
plt.plot(range(EPOCHS), test_accuracies, label='Test Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Test Accuracy')
plt.legend()
plt.show()

# 2) Plot training vs. test loss
plt.figure()
plt.plot(range(EPOCHS), train_losses, label='Train Loss')
plt.plot(range(EPOCHS), test_losses, label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()
plt.show()

NameError: name 'plt' is not defined