# Install Torch with CUDA

In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [1]:
import torch
print(torch.cuda.is_available())  # Should return True if GPU is available

True


# Download Dataset

# Prepare Dataset

In [2]:
from tqdm import tqdm
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transformations for your dataset
image_width = 32
transform = transforms.Compose([
    transforms.Resize((image_width, image_width)),  # Resize all images to 128x128
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Load train dataset
train_path = os.path.join('PlantVillage', 'train')
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)

# Load val dataset
val_path = os.path.join('PlantVillage', 'val')
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)

# Split val dataset into val and test
test_split = 0.5  # Use 50% of the current val set as the test set
test_size = int(test_split * len(val_dataset))
val_size = len(val_dataset) - test_size

val_dataset, test_dataset = random_split(val_dataset, [val_size, test_size])

# Create DataLoaders for train, val, and test datasets
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Function to count class instances with a progress bar
def count_classes(dataset):
    class_counter = Counter()
    for _, label in tqdm(dataset, desc="Counting classes"):
        class_counter[label] += 1
    return class_counter

# Access class-to-index mapping
class_to_idx = train_dataset.class_to_idx
print("Class to index mapping:", class_to_idx)

def count_classes_in_folders(dataset_path):
    """
    Count the number of items in each class folder in the dataset.
    """
    class_counts = {}
    for class_name in os.listdir(dataset_path):
        class_folder = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_folder):  # Ensure it's a directory
            class_counts[class_name] = len(os.listdir(class_folder))
    return class_counts

# Count classes
train_class_counts = count_classes_in_folders(train_path)

# Create a DataFrame from the train_class_counts dictionary
class_df = pd.DataFrame(list(train_class_counts.items()), columns=["Class", "Count"])

# Sort the DataFrame by 'Count' in ascending order
class_df_sorted = class_df.sort_values(by="Count", ascending=True).reset_index(drop=True)

# Print the sorted DataFrame
print(class_df_sorted)

Class to index mapping: {'Apple___Apple_scab': 0, 'Apple___Black_rot': 1, 'Apple___Cedar_apple_rust': 2, 'Apple___healthy': 3, 'Blueberry___healthy': 4, 'Cherry_(including_sour)___Powdery_mildew': 5, 'Cherry_(including_sour)___healthy': 6, 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot': 7, 'Corn_(maize)___Common_rust_': 8, 'Corn_(maize)___Northern_Leaf_Blight': 9, 'Corn_(maize)___healthy': 10, 'Grape___Black_rot': 11, 'Grape___Esca_(Black_Measles)': 12, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 13, 'Grape___healthy': 14, 'Orange___Haunglongbing_(Citrus_greening)': 15, 'Peach___Bacterial_spot': 16, 'Peach___healthy': 17, 'Pepper,_bell___Bacterial_spot': 18, 'Pepper,_bell___healthy': 19, 'Potato___Early_blight': 20, 'Potato___Late_blight': 21, 'Potato___healthy': 22, 'Raspberry___healthy': 23, 'Soybean___healthy': 24, 'Squash___Powdery_mildew': 25, 'Strawberry___Leaf_scorch': 26, 'Strawberry___healthy': 27, 'Tomato___Bacterial_spot': 28, 'Tomato___Early_blight': 29, 'Tomato___L

# Model

In [None]:
import torch
import torch.nn as nn


class SimpleCNN(nn.Module):
    def __init__(self, num_classes, image_width):
        super(SimpleCNN, self).__init__()  # Call the parent class constructor
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # nn.Conv2d(35, 60, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(60),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Dropout(0.20)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * (image_width//4)**2, 512),
            nn.ReLU(),
            nn.Dropout(0.45),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x

# Training

In [6]:
import torch.optim as optim


# Model, loss function, optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
num_classes = len(train_dataset.classes)
model = SimpleCNN(num_classes=num_classes, image_width=image_width).to(device)  # Move model to device
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Set maximum number of epochs and patience for early stopping
max_epochs = 40
patience = 5  # Number of epochs with no improvement after which training will stop
best_val_loss = float('inf')  # Initialize best validation loss as infinity
epochs_without_improvement = 0

# Training loop
for epoch in range(max_epochs):
    model.train()
    running_loss = 0.0
    train_loader = tqdm(train_loader, desc=f"Epoch {epoch+1}/{max_epochs}", unit="batch")  # Add tqdm to training loop
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to device
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        train_loader.set_postfix(loss=loss.item())  # Update tqdm progress bar with current loss
    
    print(f"Epoch {epoch+1}, Average Loss: {(running_loss / len(train_loader)):.4f}")

    # Evaluate on the validation dataset
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():  # Disable gradient computation during evaluation
        for images, labels in val_loader:  # Assuming val_loader is the DataLoader for the test/validation set
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_val_loss = val_loss / len(val_loader)
    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")
    print(f"Validation Loss: {avg_val_loss:.4f}")
    
    # Check for early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_without_improvement = 0  # Reset counter
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break  # Stop training early if no improvement

    # Check if we have reached the maximum number of epochs
    if epoch + 1 == max_epochs:
        print("Maximum number of epochs reached.")
        break

Epoch 1/40:   0%|          | 0/1358 [00:00<?, ?batch/s]

Epoch 1/40: 100%|██████████| 1358/1358 [00:56<00:00, 23.90batch/s, loss=0.911]


Epoch 1, Average Loss: 1.3081
Validation Accuracy: 80.67%
Validation Loss: 0.6176


Epoch 2/40: 100%|██████████| 1358/1358 [00:56<00:00, 24.22batch/s, loss=1.1]  


Epoch 2, Average Loss: 0.6652
Validation Accuracy: 86.25%
Validation Loss: 0.4423


Epoch 3/40: 100%|██████████| 1358/1358 [00:57<00:00, 23.62batch/s, loss=0.382] 


Epoch 3, Average Loss: 0.5061
Validation Accuracy: 86.95%
Validation Loss: 0.3969


Epoch 4/40: 100%|██████████| 1358/1358 [00:57<00:00, 23.82batch/s, loss=0.35]  


Epoch 4, Average Loss: 0.4166
Validation Accuracy: 89.95%
Validation Loss: 0.3031


Epoch 5/40: 100%|██████████| 1358/1358 [00:57<00:00, 23.56batch/s, loss=0.352] 


Epoch 5, Average Loss: 0.3558
Validation Accuracy: 91.68%
Validation Loss: 0.2568


Epoch 6/40: 100%|██████████| 1358/1358 [00:56<00:00, 23.90batch/s, loss=0.121] 


Epoch 6, Average Loss: 0.3148
Validation Accuracy: 90.52%
Validation Loss: 0.2713


Epoch 7/40: 100%|██████████| 1358/1358 [00:54<00:00, 24.85batch/s, loss=0.585] 


Epoch 7, Average Loss: 0.2777
Validation Accuracy: 92.10%
Validation Loss: 0.2416


Epoch 8/40: 100%|██████████| 1358/1358 [00:51<00:00, 26.44batch/s, loss=0.0628]


Epoch 8, Average Loss: 0.2612
Validation Accuracy: 91.94%
Validation Loss: 0.2455


Epoch 9/40: 100%|██████████| 1358/1358 [00:51<00:00, 26.49batch/s, loss=0.0633]


Epoch 9, Average Loss: 0.2315
Validation Accuracy: 93.41%
Validation Loss: 0.1961


Epoch 10/40: 100%|██████████| 1358/1358 [00:48<00:00, 27.99batch/s, loss=0.239] 


Epoch 10, Average Loss: 0.2157
Validation Accuracy: 93.68%
Validation Loss: 0.1935


Epoch 11/40: 100%|██████████| 1358/1358 [00:49<00:00, 27.53batch/s, loss=0.118] 


Epoch 11, Average Loss: 0.2037
Validation Accuracy: 94.02%
Validation Loss: 0.1871


Epoch 12/40: 100%|██████████| 1358/1358 [00:57<00:00, 23.61batch/s, loss=0.667] 


Epoch 12, Average Loss: 0.1827
Validation Accuracy: 93.67%
Validation Loss: 0.2107


Epoch 13/40: 100%|██████████| 1358/1358 [00:51<00:00, 26.12batch/s, loss=0.137]  


Epoch 13, Average Loss: 0.1763
Validation Accuracy: 94.22%
Validation Loss: 0.1890


Epoch 14/40: 100%|██████████| 1358/1358 [00:54<00:00, 25.03batch/s, loss=0.23]  


Epoch 14, Average Loss: 0.1696
Validation Accuracy: 93.52%
Validation Loss: 0.2051


Epoch 15/40: 100%|██████████| 1358/1358 [00:49<00:00, 27.42batch/s, loss=0.0751] 


Epoch 15, Average Loss: 0.1593
Validation Accuracy: 94.49%
Validation Loss: 0.1833


Epoch 16/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.67batch/s, loss=0.0851] 


Epoch 16, Average Loss: 0.1539
Validation Accuracy: 94.22%
Validation Loss: 0.1840


Epoch 17/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.55batch/s, loss=0.265]  


Epoch 17, Average Loss: 0.1462
Validation Accuracy: 94.33%
Validation Loss: 0.1843


Epoch 18/40: 100%|██████████| 1358/1358 [00:48<00:00, 28.24batch/s, loss=0.145]  


Epoch 18, Average Loss: 0.1417
Validation Accuracy: 93.87%
Validation Loss: 0.1959


Epoch 19/40: 100%|██████████| 1358/1358 [00:48<00:00, 28.17batch/s, loss=0.081]  


Epoch 19, Average Loss: 0.1349
Validation Accuracy: 94.35%
Validation Loss: 0.1879


Epoch 20/40: 100%|██████████| 1358/1358 [00:51<00:00, 26.12batch/s, loss=0.013]  


Epoch 20, Average Loss: 0.1338
Validation Accuracy: 94.40%
Validation Loss: 0.1783


Epoch 21/40: 100%|██████████| 1358/1358 [00:50<00:00, 27.15batch/s, loss=0.00734]


Epoch 21, Average Loss: 0.1224
Validation Accuracy: 94.68%
Validation Loss: 0.1842


Epoch 22/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.76batch/s, loss=0.106]  


Epoch 22, Average Loss: 0.1195
Validation Accuracy: 94.02%
Validation Loss: 0.2139


Epoch 23/40: 100%|██████████| 1358/1358 [00:46<00:00, 29.41batch/s, loss=0.168]  


Epoch 23, Average Loss: 0.1156
Validation Accuracy: 95.18%
Validation Loss: 0.1669


Epoch 24/40: 100%|██████████| 1358/1358 [00:45<00:00, 29.54batch/s, loss=0.0186] 


Epoch 24, Average Loss: 0.1125
Validation Accuracy: 94.31%
Validation Loss: 0.2061


Epoch 25/40: 100%|██████████| 1358/1358 [00:46<00:00, 29.12batch/s, loss=0.0893] 


Epoch 25, Average Loss: 0.1085
Validation Accuracy: 94.68%
Validation Loss: 0.1899


Epoch 26/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.62batch/s, loss=0.47]   


Epoch 26, Average Loss: 0.1112
Validation Accuracy: 94.73%
Validation Loss: 0.1887


Epoch 27/40: 100%|██████████| 1358/1358 [00:45<00:00, 29.84batch/s, loss=0.0145]  


Epoch 27, Average Loss: 0.1080
Validation Accuracy: 95.01%
Validation Loss: 0.1874


Epoch 28/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.44batch/s, loss=0.00976] 


Epoch 28, Average Loss: 0.1000
Validation Accuracy: 95.56%
Validation Loss: 0.1667


Epoch 29/40: 100%|██████████| 1358/1358 [00:46<00:00, 29.03batch/s, loss=0.0831]  


Epoch 29, Average Loss: 0.0982
Validation Accuracy: 95.12%
Validation Loss: 0.1864


Epoch 30/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.53batch/s, loss=0.0233] 


Epoch 30, Average Loss: 0.1029
Validation Accuracy: 95.03%
Validation Loss: 0.1881


Epoch 31/40: 100%|██████████| 1358/1358 [00:47<00:00, 28.74batch/s, loss=0.00262]


Epoch 31, Average Loss: 0.0986
Validation Accuracy: 95.43%
Validation Loss: 0.1724


Epoch 32/40: 100%|██████████| 1358/1358 [00:46<00:00, 29.07batch/s, loss=0.0166]  


Epoch 32, Average Loss: 0.0991
Validation Accuracy: 94.88%
Validation Loss: 0.1970


Epoch 33/40: 100%|██████████| 1358/1358 [00:50<00:00, 27.10batch/s, loss=0.231]   


Epoch 33, Average Loss: 0.0904
Validation Accuracy: 95.14%
Validation Loss: 0.1825
Early stopping triggered after 33 epochs.


# Testing

In [7]:
from sklearn.metrics import f1_score

# Evaluate on the test dataset with tqdm
model.eval()
correct = 0
total = 0
all_labels = []  # To store true labels
all_preds = []   # To store predicted labels
test_loader = tqdm(test_loader, desc="Testing", unit="batch")  # Add tqdm for the test loop

with torch.no_grad():  # Disable gradient computation during evaluation
    for images, labels in test_loader:  # Loop through test dataset
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        all_labels.extend(labels.cpu().numpy())  # Append true labels
        all_preds.extend(predicted.cpu().numpy())  # Append predicted labels
        
        # Update tqdm bar with current accuracy
        test_loader.set_postfix(accuracy=(100 * correct / total))  

# Compute test accuracy
test_accuracy = 100 * correct / total

# Compute F1 score for the whole dataset
f1 = f1_score(all_labels, all_preds, average='weighted')  # You can also use 'macro' or 'micro' as needed

print(f"\nTest Accuracy: {test_accuracy:.2f}%")
print(f"Test F1 Score (Weighted): {f1:.4f}")

Testing: 100%|██████████| 170/170 [00:23<00:00,  7.30batch/s, accuracy=95.4]


Test Accuracy: 95.43%
Test F1 Score (Weighted): 0.9539



