In [2]:
import os

# Define the dataset path
dataset_path = '/kaggle/input/diamos-plant-dataset/Pear/leaves'

# Check if the dataset path exists
if not os.path.exists(dataset_path):
    print(f"Error: Dataset path '{dataset_path}' does not exist.")
else:
    # Count the number of files in each subfolder (Pear and leaves)
    for folder in os.listdir(dataset_path):
        folder_path = os.path.join(dataset_path, folder)
        if os.path.isdir(folder_path):
            num_files = len(os.listdir(folder_path))
            print(f"Number of files in '{folder}' folder: {num_files}")

Number of files in 'curl' folder: 65
Number of files in 'healthy' folder: 43
Number of files in 'spot' folder: 1768
Number of files in 'slug' folder: 4050


In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image, UnidentifiedImageError
from tqdm import tqdm

# Define the Channel Shuffle class
class ChannelShuffle(nn.Module):
    def __init__(self, groups):
        super(ChannelShuffle, self).__init__()
        self.groups = groups

    def forward(self, x):
        batchsize, num_channels, height, width = x.data.size()
        channels_per_group = num_channels // self.groups
        # Reshape
        x = x.view(batchsize, self.groups, channels_per_group, height, width)
        # Transpose
        x = torch.transpose(x, 1, 2).contiguous()
        # Flatten
        x = x.view(batchsize, -1, height, width)
        return x

# Define the RCSA module
class RCSA(nn.Module):
    def __init__(self, in_channels, reduction=16, groups=4):
        super(RCSA, self).__init__()
        self.groups = groups
        self.channel_shuffle = ChannelShuffle(groups)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv1 = nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        residual = x
        # Channel Shuffle
        x = self.channel_shuffle(x)
        # Squeeze and Excitation
        x = self.avg_pool(x)
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.sigmoid(x)
        # Scale
        x = residual * x
        return x

# Define the CNN with RCSA
class ExampleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(ExampleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.rcsa1 = RCSA(64)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.rcsa2 = RCSA(128)
        self.fc = nn.Linear(128 * 8 * 8, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.rcsa1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.rcsa2(x)
        x = F.relu(x)
        x = F.adaptive_avg_pool2d(x, (8, 8))
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Custom dataset class to check if images are valid
class ValidImageFolder(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.classes, self.class_to_idx = self._find_classes(root)
        self.samples = self.make_dataset(root, self.class_to_idx)

    def _find_classes(self, dir):
        classes = [d.name for d in os.scandir(dir) if d.is_dir()]
        classes.sort()
        class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
        return classes, class_to_idx

    def make_dataset(self, directory, class_to_idx):
        instances = []
        directory = os.path.expanduser(directory)
        for target_class in sorted(class_to_idx.keys()):
            class_index = class_to_idx[target_class]
            target_dir = os.path.join(directory, target_class)
            if not os.path.isdir(target_dir):
                continue

            for root, _, fnames in sorted(os.walk(target_dir)):
                for fname in sorted(fnames):
                    path = os.path.join(root, fname)
                    if self.is_valid_image(path):
                        item = (path, class_index)
                        instances.append(item)
        return instances

    def is_valid_image(self, path):
        try:
            img = Image.open(path)
            img.verify()  # PIL does not fully read the image file in verify
            return True
        except (IOError, SyntaxError, UnidentifiedImageError):
            return False

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        path, target = self.samples[index]
        img = Image.open(path).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)
        return img, target

# Prepare the dataset
dataset_path = '/kaggle/input/diamos-plant-dataset/Pear/leaves'
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

full_dataset = ValidImageFolder(root=dataset_path, transform=transform)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

# Training and evaluation functions
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
    print(f'Epoch {epoch}: Loss: {running_loss/len(train_loader)}, Accuracy: {100.*correct/total}')

def validate(model, device, val_loader, criterion):
    model.eval()
    val_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            val_loss += criterion(outputs, target).item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(target).sum().item()
    val_loss /= len(val_loader.dataset)
    print(f'Validation set: Average loss: {val_loss:.4f}, Accuracy: {correct}/{len(val_loader.dataset)} ({100.*correct/len(val_loader.dataset):.0f}%)')

# Initialize and train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ExampleCNN(num_classes=len(full_dataset.classes)).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(1, 11):
    train(model, device, train_loader, optimizer, criterion, epoch)
    validate(model, device, val_loader, criterion)

100%|██████████| 38/38 [03:47<00:00,  5.98s/it]

Epoch 1: Loss: 0.7226115590647647, Accuracy: 71.21464226289517





Validation set: Average loss: 0.0108, Accuracy: 445/602 (74%)


100%|██████████| 38/38 [03:32<00:00,  5.58s/it]

Epoch 2: Loss: 0.5367435860006433, Accuracy: 76.12312811980033





Validation set: Average loss: 0.0107, Accuracy: 440/602 (73%)


100%|██████████| 38/38 [03:26<00:00,  5.43s/it]

Epoch 3: Loss: 0.49330240252770874, Accuracy: 77.99500831946756





Validation set: Average loss: 0.0093, Accuracy: 464/602 (77%)


100%|██████████| 38/38 [03:34<00:00,  5.64s/it]

Epoch 4: Loss: 0.4164703833429437, Accuracy: 81.40599001663894





Validation set: Average loss: 0.0100, Accuracy: 455/602 (76%)


100%|██████████| 38/38 [03:25<00:00,  5.41s/it]

Epoch 5: Loss: 0.40564152284672383, Accuracy: 82.32113144758735





Validation set: Average loss: 0.0099, Accuracy: 459/602 (76%)


100%|██████████| 38/38 [03:28<00:00,  5.48s/it]

Epoch 6: Loss: 0.3648875971373759, Accuracy: 84.56738768718802





Validation set: Average loss: 0.0093, Accuracy: 468/602 (78%)


100%|██████████| 38/38 [03:41<00:00,  5.84s/it]

Epoch 7: Loss: 0.34727511123607036, Accuracy: 84.69217970049917





Validation set: Average loss: 0.0098, Accuracy: 469/602 (78%)


100%|██████████| 38/38 [03:23<00:00,  5.36s/it]

Epoch 8: Loss: 0.3191070650753222, Accuracy: 86.27287853577371





Validation set: Average loss: 0.0091, Accuracy: 477/602 (79%)


100%|██████████| 38/38 [03:28<00:00,  5.48s/it]

Epoch 9: Loss: 0.2768731681924117, Accuracy: 88.64392678868552





Validation set: Average loss: 0.0100, Accuracy: 472/602 (78%)


100%|██████████| 38/38 [03:25<00:00,  5.41s/it]

Epoch 10: Loss: 0.254689075444874, Accuracy: 89.76705490848586





Validation set: Average loss: 0.0089, Accuracy: 482/602 (80%)
