### Enviroment Setup

#### Install Torch with CUDA

In [None]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q

In [None]:
# import torch
# print(torch.cuda.is_available())  # Should return True if GPU is available

#### Download Dataset

In [None]:
# !pip install gdown -q

This cell might take more than 1 minute to run

In [None]:
# import os
# import gdown
# import zipfile

# # URL of the zip file on Google Drive
# url = 'https://drive.google.com/file/d/1OXoi4UeZy726ILuPM6Y57sr4eZhqrhq2/view?usp=sharing'

# # Function to download the zip file and extract it
# def download_and_extract_zip(url, extract_to='PlantVillage'):
#     # Generate the direct download URL for the file
#     file_id = url.split('/d/')[1].split('/')[0]
#     download_url = f'https://drive.google.com/uc?id={file_id}'

#     # Download the zip file
#     zip_file = 'PlantVillage.zip'
#     gdown.download(download_url, zip_file, quiet=False)

#     # Extract the zip file directly
#     with zipfile.ZipFile(zip_file, 'r') as zip_ref:
#         # Extract all the files directly into the 'extract_to' folder
#         zip_ref.extractall(extract_to)

#     # Remove the zip file after extraction
#     os.remove(zip_file)

# # Check if the 'PlantVillage' folder exists
# if not os.path.exists('PlantVillage') or not os.listdir('PlantVillage'):
#     download_and_extract_zip(url)

### Dataset Preprocessing

In [1]:
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transformations for your dataset
image_width = 32
transform = transforms.Compose([
    transforms.Resize((image_width, image_width)),  # Resize all images to 128x128
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Load train dataset
train_path = os.path.join('PlantVillage', 'train')
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)

# Load val dataset
val_path = os.path.join('PlantVillage', 'val')
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)

# Split val dataset into val and test
test_split = 0.5  # Use 50% of the current val set as the test set
test_size = int(test_split * len(val_dataset))
val_size = len(val_dataset) - test_size

val_dataset, test_dataset = random_split(val_dataset, [val_size, test_size])

# Create DataLoaders for train, val, and test datasets
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Function to count class instances with a progress bar
def count_classes(dataset):
    class_counter = Counter()
    for _, label in tqdm(dataset, desc="Counting classes"):
        class_counter[label] += 1
    return class_counter

# Access class-to-index mapping
class_to_idx = train_dataset.class_to_idx
print("Class to index mapping:", class_to_idx)

def count_classes_in_folders(dataset_path):
    """
    Count the number of items in each class folder in the dataset.
    """
    class_counts = {}
    for class_name in os.listdir(dataset_path):
        class_folder = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_folder):  # Ensure it's a directory
            class_counts[class_name] = len(os.listdir(class_folder))
    return class_counts

# Count classes
train_class_counts = count_classes_in_folders(train_path)

# Create a DataFrame from the train_class_counts dictionary
class_df = pd.DataFrame(list(train_class_counts.items()), columns=["Class", "Count"])

# Sort the DataFrame by 'Count' in ascending order
class_df_sorted = class_df.sort_values(by="Count", ascending=True).reset_index(drop=True)

# Print the sorted DataFrame
print(class_df_sorted)

Class to index mapping: {'Apple___Apple_scab': 0, 'Apple___Black_rot': 1, 'Apple___Cedar_apple_rust': 2, 'Apple___healthy': 3, 'Blueberry___healthy': 4, 'Cherry_(including_sour)___Powdery_mildew': 5, 'Cherry_(including_sour)___healthy': 6, 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot': 7, 'Corn_(maize)___Common_rust_': 8, 'Corn_(maize)___Northern_Leaf_Blight': 9, 'Corn_(maize)___healthy': 10, 'Grape___Black_rot': 11, 'Grape___Esca_(Black_Measles)': 12, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 13, 'Grape___healthy': 14, 'Orange___Haunglongbing_(Citrus_greening)': 15, 'Peach___Bacterial_spot': 16, 'Peach___healthy': 17, 'Pepper,_bell___Bacterial_spot': 18, 'Pepper,_bell___healthy': 19, 'Potato___Early_blight': 20, 'Potato___Late_blight': 21, 'Potato___healthy': 22, 'Raspberry___healthy': 23, 'Soybean___healthy': 24, 'Squash___Powdery_mildew': 25, 'Strawberry___Leaf_scorch': 26, 'Strawberry___healthy': 27, 'Tomato___Bacterial_spot': 28, 'Tomato___Early_blight': 29, 'Tomato___L

In [None]:
# import torch.optim as optim

# # Model, loss function, optimizer
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
# num_classes = len(train_dataset.classes)
# model = SimpleCNN(num_classes=num_classes, image_width=image_width).to(device)  # Move model to device
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters(), lr=0.001)

# # Set maximum number of epochs and patience for early stopping
# max_epochs = 40
# patience = 5  # Number of epochs with no improvement after which training will stop
# best_val_loss = float('inf')  # Initialize best validation loss as infinity
# epochs_without_improvement = 0

# # Training loop
# for epoch in range(max_epochs):
#     model.train()
#     running_loss = 0.0
#     train_loader = tqdm(train_loader, desc=f"Epoch {epoch+1}/{max_epochs}", unit="batch")  # Add tqdm to training loop
    
#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)  # Move data to device
#         optimizer.zero_grad()
#         outputs = model(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         optimizer.step()
#         running_loss += loss.item()
#         train_loader.set_postfix(loss=loss.item())  # Update tqdm progress bar with current loss
    
#     print(f"Epoch {epoch+1}, Average Loss: {(running_loss / len(train_loader)):.4f}")

#     # Evaluate on the validation dataset
#     model.eval()
#     correct = 0
#     total = 0
#     val_loss = 0.0
#     with torch.no_grad():  # Disable gradient computation during evaluation
#         for images, labels in val_loader:  # Assuming val_loader is the DataLoader for the test/validation set
#             images, labels = images.to(device), labels.to(device)
#             outputs = model(images)
#             loss = criterion(outputs, labels)
#             val_loss += loss.item()
            
#             _, predicted = torch.max(outputs, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
    
#     avg_val_loss = val_loss / len(val_loader)
#     accuracy = 100 * correct / total
#     print(f"Validation Accuracy: {accuracy:.2f}%")
#     print(f"Validation Loss: {avg_val_loss:.4f}")
    
#     # Check for early stopping
#     if avg_val_loss < best_val_loss:
#         best_val_loss = avg_val_loss
#         epochs_without_improvement = 0  # Reset counter
#     else:
#         epochs_without_improvement += 1
#         if epochs_without_improvement >= patience:
#             print(f"Early stopping triggered after {epoch+1} epochs.")
#             break  # Stop training early if no improvement

#     # Check if we have reached the maximum number of epochs
#     if epoch + 1 == max_epochs:
#         print("Maximum number of epochs reached.")
#         break

### Training function

In [2]:
import torch.optim as optim

def train_model(model, criterion, optimizer, num_classes, train_loader, epoch=50):
    # Model, loss function, optimizer
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
    # num_classes = len(train_dataset.classes)
    # model = SimpleCNN(num_classes=num_classes, image_width=image_width).to(device)  # Move model to device
    # criterion = nn.CrossEntropyLoss()
    # optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Set maximum number of epochs and patience for early stopping
    max_epochs = epoch
    patience = 5  # Number of epochs with no improvement after which training will stop
    best_val_loss = float('inf')  # Initialize best validation loss as infinity
    epochs_without_improvement = 0

    # Training loop
    for epoch in range(max_epochs):
        model.train()
        running_loss = 0.0
        train_loader = tqdm(train_loader, desc=f"Epoch {epoch+1}/{max_epochs}", unit="batch")  # Add tqdm to training loop
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to device
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            train_loader.set_postfix(loss=loss.item())  # Update tqdm progress bar with current loss
        
        print(f"Epoch {epoch+1}, Average Loss: {(running_loss / len(train_loader)):.4f}")

        # Evaluate on the validation dataset
        model.eval()
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():  # Disable gradient computation during evaluation
            for images, labels in val_loader:  # Assuming val_loader is the DataLoader for the test/validation set
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        avg_val_loss = val_loss / len(val_loader)
        accuracy = 100 * correct / total
        print(f"Validation Accuracy: {accuracy:.2f}%")
        print(f"Validation Loss: {avg_val_loss:.4f}")
        
        # Check for early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_without_improvement = 0  # Reset counter
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs.")
                break  # Stop training early if no improvement

        # Check if we have reached the maximum number of epochs
        if epoch + 1 == max_epochs:
            print("Maximum number of epochs reached.")
            break

In [None]:
# from sklearn.metrics import f1_score

# # Evaluate on the test dataset with tqdm
# model.eval()
# correct = 0
# total = 0
# all_labels = []  # To store true labels
# all_preds = []   # To store predicted labels
# test_loader = tqdm(test_loader, desc="Testing", unit="batch")  # Add tqdm for the test loop

# with torch.no_grad():  # Disable gradient computation during evaluation
#     for images, labels in test_loader:  # Loop through test dataset
#         images, labels = images.to(device), labels.to(device)
#         outputs = model(images)
#         _, predicted = torch.max(outputs, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
        
#         all_labels.extend(labels.cpu().numpy())  # Append true labels
#         all_preds.extend(predicted.cpu().numpy())  # Append predicted labels
        
#         # Update tqdm bar with current accuracy
#         test_loader.set_postfix(accuracy=(100 * correct / total))  

# # Compute test accuracy
# test_accuracy = 100 * correct / total

# # Compute F1 score for the whole dataset
# f1 = f1_score(all_labels, all_preds, average='weighted')  # You can also use 'macro' or 'micro' as needed

# print(f"\nTest Accuracy: {test_accuracy:.2f}%")
# print(f"Test F1 Score (Weighted): {f1:.4f}")

### Testing function

In [3]:
from sklearn.metrics import f1_score

def test_model(model, test_loader):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available

    # Evaluate on the test dataset with tqdm
    model.eval()
    correct = 0
    total = 0
    all_labels = []  # To store true labels
    all_preds = []   # To store predicted labels
    test_loader = tqdm(test_loader, desc="Testing", unit="batch")  # Add tqdm for the test loop

    with torch.no_grad():  # Disable gradient computation during evaluation
        for images, labels in test_loader:  # Loop through test dataset
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_labels.extend(labels.cpu().numpy())  # Append true labels
            all_preds.extend(predicted.cpu().numpy())  # Append predicted labels
            
            # Update tqdm bar with current accuracy
            test_loader.set_postfix(accuracy=(100 * correct / total))  

    # Compute test accuracy
    test_accuracy = 100 * correct / total

    # Compute F1 score for the whole dataset
    f1 = f1_score(all_labels, all_preds, average='weighted')  # You can also use 'macro' or 'micro' as needed

    print(f"\nTest Accuracy: {test_accuracy:.2f}%")
    print(f"Test F1 Score (Weighted): {f1:.4f}")

### Evaluation function

In [4]:
from sklearn.metrics import classification_report, confusion_matrix

class_names = train_dataset.classes

def evaluate_model(model, val_loader, class_names):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    print(classification_report(all_labels, all_preds, target_names=class_names))

    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    plt.imshow(cm, interpolation='nearest', cmap='Blues')
    plt.title("Confusion Matrix")
    plt.colorbar()
    plt.show()

### Simple CNN Model Training and Testing

#### Model definition

In [5]:
import torch
import torch.nn as nn


class SimpleCNN(nn.Module):
    def __init__(self, num_classes, image_width):
        super(SimpleCNN, self).__init__()  # Call the parent class constructor
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # nn.Conv2d(35, 60, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(60),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Dropout(0.20)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * (image_width//4)**2, 512),
            nn.ReLU(),
            nn.Dropout(0.45),
            nn.Linear(512, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x

#### Training

In [6]:
num_classes = len(train_dataset.classes)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
model = SimpleCNN(num_classes=num_classes, image_width=image_width).to(device)  # Move model to device

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(model, criterion, optimizer, num_classes, train_loader, epoch=40)

Epoch 1/40: 100%|██████████| 1358/1358 [00:11<00:00, 121.96batch/s, loss=0.665]

Epoch 1, Average Loss: 1.2995





Validation Accuracy: 80.34%
Validation Loss: 0.6502


Epoch 2/40: 100%|██████████| 1358/1358 [00:07<00:00, 180.64batch/s, loss=0.236]

Epoch 2, Average Loss: 0.6702





Validation Accuracy: 86.61%
Validation Loss: 0.4369


Epoch 3/40: 100%|██████████| 1358/1358 [00:07<00:00, 182.33batch/s, loss=0.38] 

Epoch 3, Average Loss: 0.5106





Validation Accuracy: 88.31%
Validation Loss: 0.3554


Epoch 4/40: 100%|██████████| 1358/1358 [00:08<00:00, 167.32batch/s, loss=0.151] 

Epoch 4, Average Loss: 0.4136





Validation Accuracy: 90.22%
Validation Loss: 0.3023


Epoch 5/40: 100%|██████████| 1358/1358 [00:07<00:00, 177.06batch/s, loss=0.228] 

Epoch 5, Average Loss: 0.3584





Validation Accuracy: 92.06%
Validation Loss: 0.2420


Epoch 6/40: 100%|██████████| 1358/1358 [00:07<00:00, 180.00batch/s, loss=0.156] 

Epoch 6, Average Loss: 0.3110





Validation Accuracy: 91.51%
Validation Loss: 0.2559


Epoch 7/40: 100%|██████████| 1358/1358 [00:08<00:00, 162.16batch/s, loss=0.0373]

Epoch 7, Average Loss: 0.2758





Validation Accuracy: 93.92%
Validation Loss: 0.1899


Epoch 8/40: 100%|██████████| 1358/1358 [00:07<00:00, 179.57batch/s, loss=0.0119] 

Epoch 8, Average Loss: 0.2474





Validation Accuracy: 93.50%
Validation Loss: 0.1889


Epoch 9/40: 100%|██████████| 1358/1358 [00:07<00:00, 175.26batch/s, loss=0.503] 

Epoch 9, Average Loss: 0.2245





Validation Accuracy: 93.96%
Validation Loss: 0.1806


Epoch 10/40: 100%|██████████| 1358/1358 [00:07<00:00, 179.26batch/s, loss=0.237]  

Epoch 10, Average Loss: 0.2058





Validation Accuracy: 94.38%
Validation Loss: 0.1695


Epoch 11/40: 100%|██████████| 1358/1358 [00:08<00:00, 162.54batch/s, loss=0.155] 

Epoch 11, Average Loss: 0.1908





Validation Accuracy: 93.76%
Validation Loss: 0.1943


Epoch 12/40: 100%|██████████| 1358/1358 [00:07<00:00, 179.33batch/s, loss=0.129] 

Epoch 12, Average Loss: 0.1808





Validation Accuracy: 94.90%
Validation Loss: 0.1575


Epoch 13/40: 100%|██████████| 1358/1358 [00:07<00:00, 175.57batch/s, loss=0.214] 

Epoch 13, Average Loss: 0.1685





Validation Accuracy: 94.05%
Validation Loss: 0.1931


Epoch 14/40: 100%|██████████| 1358/1358 [00:07<00:00, 177.31batch/s, loss=0.0383] 

Epoch 14, Average Loss: 0.1547





Validation Accuracy: 94.57%
Validation Loss: 0.1699


Epoch 15/40: 100%|██████████| 1358/1358 [00:08<00:00, 162.74batch/s, loss=0.426]  

Epoch 15, Average Loss: 0.1495





Validation Accuracy: 95.03%
Validation Loss: 0.1562


Epoch 16/40: 100%|██████████| 1358/1358 [00:07<00:00, 176.00batch/s, loss=0.105]  


Epoch 16, Average Loss: 0.1436
Validation Accuracy: 93.37%
Validation Loss: 0.2140


Epoch 17/40: 100%|██████████| 1358/1358 [00:07<00:00, 172.25batch/s, loss=0.00826]

Epoch 17, Average Loss: 0.1379





Validation Accuracy: 94.59%
Validation Loss: 0.1754


Epoch 18/40: 100%|██████████| 1358/1358 [00:07<00:00, 175.62batch/s, loss=0.0868] 

Epoch 18, Average Loss: 0.1291





Validation Accuracy: 95.08%
Validation Loss: 0.1612


Epoch 19/40: 100%|██████████| 1358/1358 [00:07<00:00, 176.17batch/s, loss=0.0217] 

Epoch 19, Average Loss: 0.1290





Validation Accuracy: 94.94%
Validation Loss: 0.1684


Epoch 20/40: 100%|██████████| 1358/1358 [00:07<00:00, 178.11batch/s, loss=0.0588] 

Epoch 20, Average Loss: 0.1181





Validation Accuracy: 94.81%
Validation Loss: 0.1804
Early stopping triggered after 20 epochs.


#### Testing

In [7]:
test_model(model, test_loader)

Testing: 100%|██████████| 170/170 [00:01<00:00, 116.78batch/s, accuracy=94.4]



Test Accuracy: 94.44%
Test F1 Score (Weighted): 0.9434


#### Evaluation

In [None]:
evaluate_model(model, val_loader, class_names)

#### Model saving

In [None]:
# Save model to file
torch.save(model.state_dict(), 'plant_disease_model.pth')

#### Model Loading from file

In [None]:
# Retrieve model from file 
model.load_state_dict(torch.load('plant_disease_model.pth'))
model = model.to(device)

#### Predict single image from external JPG image

In [None]:
from PIL import Image
import torch
import torchvision.transforms as transforms

# Load the trained model
model.load_state_dict(torch.load('plant_disease_model.pth'))
model.eval()
model.to(device)

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((image_width, image_width)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load and preprocess the image
image_path = "test.JPG"  # Replace with your image path
image = Image.open(image_path).convert("RGB")
image_tensor = transform(image).unsqueeze(0).to(device)

# Perform the prediction
with torch.no_grad():
    outputs = model(image_tensor)
    _, predicted_class = torch.max(outputs, 1)

# Map index to class label
predicted_label = train_dataset.classes[predicted_class.item()]
probabilities = torch.nn.functional.softmax(outputs, dim=1)
predicted_prob = probabilities[0, predicted_class].item()

print(f"The predicted class is: {predicted_label} with probability {predicted_prob:.4f}")

### ResNet Model Training and Testing

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Caricare ResNet-18 pre-addestrato
model = models.resnet18(pretrained=True)

# Congelare i layer pre-addestrati
# for param in model.parameters():
#     param.requires_grad = False

# Modificare l'ultimo livello fully connected
num_classes = len(train_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Muovere il modello su GPU o CPU
model = model.to(device)

# Loss e ottimizzatore
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)



In [10]:
import os
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transformations for your dataset
image_width = 224
transform = transforms.Compose([
    transforms.Resize((image_width, image_width)),  # Resize all images to 128x128
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Load train dataset
train_path = os.path.join('PlantVillage', 'train')
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)

# Load val dataset
val_path = os.path.join('PlantVillage', 'val')
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)

# Split val dataset into val and test
test_split = 0.5  # Use 50% of the current val set as the test set
test_size = int(test_split * len(val_dataset))
val_size = len(val_dataset) - test_size

val_dataset, test_dataset = random_split(val_dataset, [val_size, test_size])

# Create DataLoaders for train, val, and test datasets
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Function to count class instances with a progress bar
def count_classes(dataset):
    class_counter = Counter()
    for _, label in tqdm(dataset, desc="Counting classes"):
        class_counter[label] += 1
    return class_counter

# Access class-to-index mapping
class_to_idx = train_dataset.class_to_idx
print("Class to index mapping:", class_to_idx)

def count_classes_in_folders(dataset_path):
    """
    Count the number of items in each class folder in the dataset.
    """
    class_counts = {}
    for class_name in os.listdir(dataset_path):
        class_folder = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_folder):  # Ensure it's a directory
            class_counts[class_name] = len(os.listdir(class_folder))
    return class_counts

# Count classes
train_class_counts = count_classes_in_folders(train_path)

# Create a DataFrame from the train_class_counts dictionary
class_df = pd.DataFrame(list(train_class_counts.items()), columns=["Class", "Count"])

# Sort the DataFrame by 'Count' in ascending order
class_df_sorted = class_df.sort_values(by="Count", ascending=True).reset_index(drop=True)

# Print the sorted DataFrame
print(class_df_sorted)

Class to index mapping: {'Apple___Apple_scab': 0, 'Apple___Black_rot': 1, 'Apple___Cedar_apple_rust': 2, 'Apple___healthy': 3, 'Blueberry___healthy': 4, 'Cherry_(including_sour)___Powdery_mildew': 5, 'Cherry_(including_sour)___healthy': 6, 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot': 7, 'Corn_(maize)___Common_rust_': 8, 'Corn_(maize)___Northern_Leaf_Blight': 9, 'Corn_(maize)___healthy': 10, 'Grape___Black_rot': 11, 'Grape___Esca_(Black_Measles)': 12, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 13, 'Grape___healthy': 14, 'Orange___Haunglongbing_(Citrus_greening)': 15, 'Peach___Bacterial_spot': 16, 'Peach___healthy': 17, 'Pepper,_bell___Bacterial_spot': 18, 'Pepper,_bell___healthy': 19, 'Potato___Early_blight': 20, 'Potato___Late_blight': 21, 'Potato___healthy': 22, 'Raspberry___healthy': 23, 'Soybean___healthy': 24, 'Squash___Powdery_mildew': 25, 'Strawberry___Leaf_scorch': 26, 'Strawberry___healthy': 27, 'Tomato___Bacterial_spot': 28, 'Tomato___Early_blight': 29, 'Tomato___L

#### ResNet Training

In [11]:
import torch.optim as optim

train_model(model, criterion, optimizer, num_classes, train_loader, epoch=10)

Epoch 1/10: 100%|██████████| 1358/1358 [01:11<00:00, 18.93batch/s, loss=0.386] 

Epoch 1, Average Loss: 0.5714





Validation Accuracy: 93.67%
Validation Loss: 0.2324


Epoch 2/10: 100%|██████████| 1358/1358 [01:07<00:00, 20.20batch/s, loss=0.22]  

Epoch 2, Average Loss: 0.2252





Validation Accuracy: 94.55%
Validation Loss: 0.1739


Epoch 3/10: 100%|██████████| 1358/1358 [01:10<00:00, 19.39batch/s, loss=0.179] 

Epoch 3, Average Loss: 0.1724





Validation Accuracy: 95.32%
Validation Loss: 0.1481


Epoch 4/10: 100%|██████████| 1358/1358 [01:07<00:00, 20.07batch/s, loss=0.0503]

Epoch 4, Average Loss: 0.1491





Validation Accuracy: 95.53%
Validation Loss: 0.1315


Epoch 5/10: 100%|██████████| 1358/1358 [01:06<00:00, 20.43batch/s, loss=0.11]  

Epoch 5, Average Loss: 0.1300





Validation Accuracy: 96.34%
Validation Loss: 0.1163


Epoch 6/10: 100%|██████████| 1358/1358 [01:01<00:00, 22.10batch/s, loss=0.201]  

Epoch 6, Average Loss: 0.1192





Validation Accuracy: 95.80%
Validation Loss: 0.1243


Epoch 7/10: 100%|██████████| 1358/1358 [01:02<00:00, 21.62batch/s, loss=0.0226] 

Epoch 7, Average Loss: 0.1134





Validation Accuracy: 96.19%
Validation Loss: 0.1124


Epoch 8/10: 100%|██████████| 1358/1358 [01:01<00:00, 21.91batch/s, loss=0.0631] 

Epoch 8, Average Loss: 0.1038





Validation Accuracy: 96.21%
Validation Loss: 0.1162


Epoch 9/10: 100%|██████████| 1358/1358 [01:03<00:00, 21.50batch/s, loss=0.0553] 

Epoch 9, Average Loss: 0.1015





Validation Accuracy: 96.43%
Validation Loss: 0.1016


Epoch 10/10: 100%|██████████| 1358/1358 [01:05<00:00, 20.80batch/s, loss=0.157]  

Epoch 10, Average Loss: 0.0955





Validation Accuracy: 96.43%
Validation Loss: 0.1125
Maximum number of epochs reached.


#### ResNet Testing

In [None]:
from sklearn.metrics import f1_score

# Evaluate on the test dataset with tqdm
model.eval()
correct = 0
total = 0
all_labels = []  # To store true labels
all_preds = []   # To store predicted labels
test_loader = tqdm(test_loader, desc="Testing", unit="batch")  # Add tqdm for the test loop

with torch.no_grad():  # Disable gradient computation during evaluation
    for images, labels in test_loader:  # Loop through test dataset
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        all_labels.extend(labels.cpu().numpy())  # Append true labels
        all_preds.extend(predicted.cpu().numpy())  # Append predicted labels
        
        # Update tqdm bar with current accuracy
        test_loader.set_postfix(accuracy=(100 * correct / total))  

# Compute test accuracy
test_accuracy = 100 * correct / total

# Compute F1 score for the whole dataset
f1 = f1_score(all_labels, all_preds, average='weighted')  # You can also use 'macro' or 'micro' as needed

print(f"\nTest Accuracy: {test_accuracy:.2f}%")
print(f"Test F1 Score (Weighted): {f1:.4f}")

#### ResNet Evaluation

In [None]:
class_names = train_dataset.classes
evaluate_model(model, val_loader, class_names)

#### Save ResNet Model

In [12]:
torch.save(model.state_dict(), 'resNet_plant_disease_model.pth')

In [20]:
from PIL import Image
import torch
import torchvision.transforms as transforms

# Load the trained model
model.load_state_dict(torch.load('resNet_plant_disease_model.pth'))
model.eval()
model.to(device)

image_width = 224

# Define the transformations
transform = transforms.Compose([
    transforms.Resize((image_width, image_width)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load and preprocess the image
image_path = "/home/alessio/FDS-Project/anotherTask.jpg"  # Replace with your image path
image = Image.open(image_path).convert("RGB")
image_tensor = transform(image).unsqueeze(0).to(device)

# Perform the prediction
with torch.no_grad():
    outputs = model(image_tensor)
    probabilities = torch.nn.functional.softmax(outputs, dim=1)
    top3_prob, top3_classes = torch.topk(probabilities, 5)  # Get top 3 probabilities and class indices

# Print the top 3 classes and their probabilities
print("Top 3 predicted classes:")
for i in range(5):
    class_index = top3_classes[0, i].item()
    class_label = train_dataset.classes[class_index]
    class_prob = top3_prob[0, i].item()
    print(f"{i+1}. {class_label}: {class_prob:.4f}")

Top 3 predicted classes:
1. Apple___Cedar_apple_rust: 0.3975
2. Grape___Esca_(Black_Measles): 0.3141
3. Strawberry___Leaf_scorch: 0.2657
4. Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot: 0.0076
5. Apple___Black_rot: 0.0067
