In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision import datasets, transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np
from torchvision.models import ResNet50_Weights
import matplotlib.pyplot as plt

In [2]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'


In [3]:
# google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Assuming this is the path in your Google Drive
dataset_path = '/content/drive/MyDrive/Rice leaf/Rice Leaf Disease Images/'
# dataset_path = '/content/drive/MyDrive//Rice leaf/small/'


# Image transformations for training with data augmentation
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Image transformations for validation
transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Loading datasets
train_dataset = datasets.ImageFolder(root=dataset_path, transform=transform_train)
val_dataset = datasets.ImageFolder(root=dataset_path, transform=transform_val)

# Splitting datasets
validation_split = 0.2
shuffle_dataset = True
random_seed = 42

# Creating data indices for training and validation splits
dataset_size = len(train_dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

train_indices, val_indices = indices[split:], indices[:split]

# Creating data samplers and loaders
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = DataLoader(train_dataset, batch_size=128, sampler=train_sampler)
validation_loader = DataLoader(train_dataset, batch_size=128, sampler=valid_sampler)


num_classes = 4


In [None]:
#VGG
model = models.vgg16(pretrained=True)
model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, num_classes)

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
#resnet
from torchvision.models import ResNet50_Weights

model = models.resnet50(weights=ResNet50_Weights.DEFAULT)
model.fc = nn.Linear(in_features=model.fc.in_features, out_features=num_classes, bias=True)


In [None]:
from torchvision.models import convnext_base, ConvNeXt_Base_Weights
pretrained = True

model = convnext_base(weights=ConvNeXt_Base_Weights.DEFAULT if pretrained else None)
model.classifier[2] = nn.Linear(in_features=model.classifier[2].in_features, out_features=num_classes, bias=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
print(model)

In [6]:
def train_and_validate(model, train_loader, validation_loader, num_epochs=10, patience=3):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    best_val_loss = float('inf')
    epochs_no_improve = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    train_losses = []
    val_losses = []
    accuracies = []
    print("Starting training...")

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        all_preds = []
        all_labels = []
        batch_count = 0
        print(f"Epoch {epoch+1} started...")

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            batch_count += 1


        scheduler.step()  # Adjust learning rate
        val_loss = validate(model, validation_loader, criterion, device)
        accuracy = np.mean(np.array(all_labels) == np.array(all_preds))
        train_losses.append(running_loss / len(train_loader.dataset))
        val_losses.append(val_loss)
        accuracies.append(accuracy)

        print(f'Epoch {epoch+1}, Training Loss: {running_loss/len(train_loader.dataset):.4f}, Validation Loss: {val_loss:.4f},Accuracy: {accuracy:.4f}')
        # print("Model saved as best_model.pth")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), 'model.pth')
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print("Early stopping triggered")
                break
    # Plotting training and validation losses
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    # Plotting accuracies
    plt.subplot(1, 2, 2)
    plt.plot(accuracies, label='Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [7]:
# Validation
def validate(model, validation_loader, criterion, device):
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad():
        for inputs, labels in validation_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return val_loss / len(validation_loader.dataset)

In [8]:
print(f"Total training samples: {len(train_indices)}")
print(f"Total validation samples: {len(val_indices)}")
print(f"Batch size: {train_loader.batch_size}")

Total training samples: 4762
Total validation samples: 1190
Batch size: 128


In [None]:
# Train and validate model VGG
train_and_validate(model, train_loader, validation_loader)

Starting training...
Epoch 1 started...
Epoch 1, Training Loss: 0.1677, Validation Loss: 0.0003,Accuracy: 0.9332
Epoch 2 started...
Epoch 2, Training Loss: 0.0020, Validation Loss: 0.0002,Accuracy: 1.0000
Epoch 3 started...


In [None]:
train_and_validate(model, train_loader, validation_loader)

In [None]:
#check if transformed