In [12]:
from tqdm import tqdm
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define transformations for your dataset
image_width = 32
transform = transforms.Compose([
    transforms.Resize((image_width, image_width)),  # Resize all images to 128x128
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

# Load train dataset
train_path = os.path.join('PlantVillage', 'train')
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)

# Load val dataset
val_path = os.path.join('PlantVillage', 'val')
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)

# Split val dataset into val and test
test_split = 0.5  # Use 50% of the current val set as the test set
test_size = int(test_split * len(val_dataset))
val_size = len(val_dataset) - test_size

val_dataset, test_dataset = random_split(val_dataset, [val_size, test_size])

# Create DataLoaders for train, val, and test datasets
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Function to count class instances with a progress bar
def count_classes(dataset):
    class_counter = Counter()
    for _, label in tqdm(dataset, desc="Counting classes"):
        class_counter[label] += 1
    return class_counter

# Access class-to-index mapping
class_to_idx = train_dataset.class_to_idx
print("Class to index mapping:", class_to_idx)

def count_classes_in_folders(dataset_path):
    """
    Count the number of items in each class folder in the dataset.
    """
    class_counts = {}
    for class_name in os.listdir(dataset_path):
        class_folder = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_folder):  # Ensure it's a directory
            class_counts[class_name] = len(os.listdir(class_folder))
    return class_counts

# Count classes
train_class_counts = count_classes_in_folders(train_path)

# Create a DataFrame from the train_class_counts dictionary
class_df = pd.DataFrame(list(train_class_counts.items()), columns=["Class", "Count"])

# Sort the DataFrame by 'Count' in ascending order
class_df_sorted = class_df.sort_values(by="Count", ascending=True).reset_index(drop=True)

# Print the sorted DataFrame
print(class_df_sorted)

Class to index mapping: {'Apple___Apple_scab': 0, 'Apple___Black_rot': 1, 'Apple___Cedar_apple_rust': 2, 'Apple___healthy': 3, 'Blueberry___healthy': 4, 'Cherry_(including_sour)___Powdery_mildew': 5, 'Cherry_(including_sour)___healthy': 6, 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot': 7, 'Corn_(maize)___Common_rust_': 8, 'Corn_(maize)___Northern_Leaf_Blight': 9, 'Corn_(maize)___healthy': 10, 'Grape___Black_rot': 11, 'Grape___Esca_(Black_Measles)': 12, 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)': 13, 'Grape___healthy': 14, 'Orange___Haunglongbing_(Citrus_greening)': 15, 'Peach___Bacterial_spot': 16, 'Peach___healthy': 17, 'Pepper,_bell___Bacterial_spot': 18, 'Pepper,_bell___healthy': 19, 'Potato___Early_blight': 20, 'Potato___Late_blight': 21, 'Potato___healthy': 22, 'Raspberry___healthy': 23, 'Soybean___healthy': 24, 'Squash___Powdery_mildew': 25, 'Strawberry___Leaf_scorch': 26, 'Strawberry___healthy': 27, 'Tomato___Bacterial_spot': 28, 'Tomato___Early_blight': 29, 'Tomato___L

In [10]:
class_names = [name for name in train_dataset.classes]
print("Class names:", class_names)


Class names: ['Apple___Apple_scab', 'Apple___Black_rot', 'Apple___Cedar_apple_rust', 'Apple___healthy', 'Blueberry___healthy', 'Cherry_(including_sour)___Powdery_mildew', 'Cherry_(including_sour)___healthy', 'Corn_(maize)___Cercospora_leaf_spot Gray_leaf_spot', 'Corn_(maize)___Common_rust_', 'Corn_(maize)___Northern_Leaf_Blight', 'Corn_(maize)___healthy', 'Grape___Black_rot', 'Grape___Esca_(Black_Measles)', 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)', 'Grape___healthy', 'Orange___Haunglongbing_(Citrus_greening)', 'Peach___Bacterial_spot', 'Peach___healthy', 'Pepper,_bell___Bacterial_spot', 'Pepper,_bell___healthy', 'Potato___Early_blight', 'Potato___Late_blight', 'Potato___healthy', 'Raspberry___healthy', 'Soybean___healthy', 'Squash___Powdery_mildew', 'Strawberry___Leaf_scorch', 'Strawberry___healthy', 'Tomato___Bacterial_spot', 'Tomato___Early_blight', 'Tomato___Late_blight', 'Tomato___Leaf_Mold', 'Tomato___Septoria_leaf_spot', 'Tomato___Spider_mites Two-spotted_spider_mite', 'Tomat

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Example CNN model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes, image_width):
        super(SimpleCNN, self).__init__()  # Call the parent class constructor
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # nn.Conv2d(35, 60, kernel_size=3, stride=1, padding=1),
            # nn.BatchNorm2d(60),
            # nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            
            nn.Dropout(0.20)
        )
        self.classifier = nn.Sequential(
            nn.Linear(32 * (image_width//4)**2, 512),
            nn.ReLU(),
            nn.Dropout(0.45),
            nn.Linear(512, num_classes)
        )


    
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x


# Model, loss function, optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
num_classes = len(train_dataset.classes)
model = SimpleCNN(num_classes=num_classes, image_width=image_width).to(device)  # Move model to device
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Early stopping parameters
patience = 3  # Number of epochs to wait for improvement before stopping
best_val_loss = float('inf')  # Initialize the best validation loss as infinity
epochs_without_improvement = 0  # Counter for epochs without improvement

# Training loop
for epoch in range(10):  # 10 epochs
    model.train()
    running_loss = 0.0
    train_loader = tqdm(train_loader, desc=f"Epoch {epoch+1}/{10}", unit="batch")  # Add tqdm to training loop
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to device
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        train_loader.set_postfix(loss=loss.item())  # Update tqdm progress bar with current loss
    
    print(f"Epoch {epoch+1}, Average Loss: {running_loss / len(train_loader)}")

    # Evaluate on the validation dataset
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():  # Disable gradient computation during evaluation
        for images, labels in val_loader:  # Assuming val_loader is the DataLoader for the test/validation set
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    avg_val_loss = val_loss / len(val_loader)
    accuracy = 100 * correct / total
    print(f"Validation Accuracy: {accuracy:.2f}%")
    print(f"Validation Loss: {avg_val_loss:.4f}")
    
    # Check for early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        epochs_without_improvement = 0  # Reset counter
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

Epoch 1/10: 100%|██████████| 1358/1358 [01:43<00:00, 13.13batch/s, loss=0.997]


Epoch 1, Average Loss: 1.3131740384231787
Validation Accuracy: 80.94%


Epoch 2/10: 100%|██████████| 1358/1358 [02:15<00:00, 10.02batch/s, loss=0.446]


Epoch 2, Average Loss: 0.6891044465076063
Validation Accuracy: 85.75%


Epoch 3/10: 100%|██████████| 1358/1358 [01:46<00:00, 12.71batch/s, loss=0.682] 


Epoch 3, Average Loss: 0.5300986022964141
Validation Accuracy: 88.09%


Epoch 4/10: 100%|██████████| 1358/1358 [02:03<00:00, 11.02batch/s, loss=0.368] 


Epoch 4, Average Loss: 0.4394156155347912
Validation Accuracy: 90.83%


Epoch 5/10: 100%|██████████| 1358/1358 [01:51<00:00, 12.20batch/s, loss=0.173] 


Epoch 5, Average Loss: 0.3728049454746963
Validation Accuracy: 90.74%


Epoch 6/10: 100%|██████████| 1358/1358 [04:03<00:00,  5.59batch/s, loss=0.483] 


Epoch 6, Average Loss: 0.3325787985036273
Validation Accuracy: 91.81%


Epoch 7/10:   0%|          | 0/1358 [00:00<?, ?batch/s]

In [None]:
from sklearn.metrics import f1_score

# Evaluate on the test dataset with tqdm
model.eval()
correct = 0
total = 0
all_labels = []  # To store true labels
all_preds = []   # To store predicted labels
test_loader = tqdm(test_loader, desc="Testing", unit="batch")  # Add tqdm for the test loop

with torch.no_grad():  # Disable gradient computation during evaluation
    for images, labels in test_loader:  # Loop through test dataset
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        all_labels.extend(labels.cpu().numpy())  # Append true labels
        all_preds.extend(predicted.cpu().numpy())  # Append predicted labels
        
        # Update tqdm bar with current accuracy
        test_loader.set_postfix(accuracy=(100 * correct / total))  

# Compute test accuracy
test_accuracy = 100 * correct / total

# Compute F1 score for the whole dataset
f1 = f1_score(all_labels, all_preds, average='weighted')  # You can also use 'macro' or 'micro' as needed

print(f"\nTest Accuracy: {test_accuracy:.2f}%")
print(f"Test F1 Score (Weighted): {f1:.4f}")

Testing: 100%|██████████| 65/65 [00:41<00:00,  1.56batch/s, accuracy=93.1]


Test Accuracy: 93.07%



