In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import os
import torch
from torch.utils.data import Dataset
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import shutil
from torchvision import transforms
from tqdm import tqdm

In [3]:
import kagglehub
path = kagglehub.dataset_download("csafrit2/plant-leaves-for-image-classification")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/csafrit2/plant-leaves-for-image-classification?dataset_version_number=2...


100%|██████████| 6.56G/6.56G [01:12<00:00, 97.3MB/s]

Extracting model files...





Path to dataset files: /root/.cache/kagglehub/datasets/csafrit2/plant-leaves-for-image-classification/versions/2


In [4]:

healthy = [
    os.path.join(path, 'Plants_2/train/Pomegranate healthy (P9a)'),
    os.path.join(path, 'Plants_2/train/Arjun healthy (P1b)'),
    os.path.join(path, 'Plants_2/train/Jamun healthy (P5a)')
]

diseased = [
    os.path.join(path, 'Plants_2/train/Pomegranate diseased (P9b)'),
    os.path.join(path, 'Plants_2/train/Arjun diseased (P1a)'),
    os.path.join(path, 'Plants_2/train/Jamun diseased (P5b)')
]

healthy_files = [os.path.join(h, f) for h in healthy for f in os.listdir(h) if os.path.isfile(os.path.join(h, f))]
diseased_files = [os.path.join(d, f) for d in diseased for f in os.listdir(d) if os.path.isfile(os.path.join(d, f))]



In [6]:

all_files = healthy_files + diseased_files
labels = [0] * len(healthy_files) + [1] * len(diseased_files)

print(f"Number of healthy images: {len(healthy_files)}")
print(f"Number of diseased images: {len(diseased_files)}")
print(f"Total number of images: {len(all_files)}")

train_files, test_files, train_labels, test_labels = train_test_split(all_files, labels, test_size=0.2, stratify=labels, random_state=42)


class PlantImageDataset(Dataset):
    def __init__(self, image_files, labels, transform=None, target_size=(128, 128)):
        self.image_files = image_files
        self.labels = labels
        self.target_size = target_size

        self.transform = transform if transform else transforms.Compose([
            transforms.Resize(self.target_size),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path)

        if self.transform:
            image = self.transform(image)

        label = self.labels[idx]

        return image, label
train_dataset = PlantImageDataset(train_files, train_labels, target_size=(128, 128))
test_dataset = PlantImageDataset(test_files, test_labels, target_size=(128, 128))


Number of healthy images: 755
Number of diseased images: 818
Total number of images: 1573


In [7]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [8]:
import torchvision.models as models
import torch.optim as optim
import torch.nn as nn


CNN_model = models.resnet18(pretrained=True)
for param in CNN_model.parameters():
    param.requires_grad = False

num_features = CNN_model.fc.in_features
CNN_model.fc = nn.Linear(num_features, 2)

CNN_model = CNN_model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(CNN_model.fc.parameters(), lr=0.001, momentum=0.9)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 75.1MB/s]


In [9]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

def calculate_accuracy(predictions, labels):
    _, predicted = torch.max(predictions, 1)
    correct = (predicted == labels).sum().item()
    return correct / len(labels)


num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CNN_model = CNN_model.to(device)

for epoch in range(num_epochs):
    CNN_model.train()
    running_loss = 0.0
    running_accuracy = 0.0

    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = CNN_model(images)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        running_accuracy += calculate_accuracy(outputs, labels)

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = running_accuracy / len(train_loader)

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}")



100%|██████████| 40/40 [01:11<00:00,  1.80s/it]


Epoch [1/10], Loss: 0.5451, Accuracy: 0.7003


100%|██████████| 40/40 [01:10<00:00,  1.75s/it]


Epoch [2/10], Loss: 0.3605, Accuracy: 0.8316


100%|██████████| 40/40 [01:11<00:00,  1.78s/it]


Epoch [3/10], Loss: 0.3246, Accuracy: 0.8633


100%|██████████| 40/40 [01:10<00:00,  1.76s/it]


Epoch [4/10], Loss: 0.2856, Accuracy: 0.8755


100%|██████████| 40/40 [01:11<00:00,  1.78s/it]


Epoch [5/10], Loss: 0.2638, Accuracy: 0.9023


100%|██████████| 40/40 [01:12<00:00,  1.80s/it]


Epoch [6/10], Loss: 0.2472, Accuracy: 0.9078


100%|██████████| 40/40 [01:10<00:00,  1.77s/it]


Epoch [7/10], Loss: 0.2582, Accuracy: 0.8959


100%|██████████| 40/40 [01:10<00:00,  1.75s/it]


Epoch [8/10], Loss: 0.2515, Accuracy: 0.8969


100%|██████████| 40/40 [01:10<00:00,  1.77s/it]


Epoch [9/10], Loss: 0.2370, Accuracy: 0.9067


100%|██████████| 40/40 [01:10<00:00,  1.75s/it]

Epoch [10/10], Loss: 0.2164, Accuracy: 0.9125





In [10]:

    CNN_model.eval()
    test_loss = 0.0
    test_accuracy = 0.0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = CNN_model(images)
            loss = criterion(outputs, labels)

            test_loss += loss.item()
            test_accuracy += calculate_accuracy(outputs, labels)

    test_loss /= len(test_loader)
    test_accuracy /= len(test_loader)

    print(f"Validation Loss: {test_loss:.4f}, Validation Accuracy: {test_accuracy:.4f}")

print("Training completed!")

Validation Loss: 0.2279, Validation Accuracy: 0.8946
Training completed!


In [18]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
os.makedirs('/content/drive/My Drive/models', exist_ok=True)
model_path = '/content/drive/My Drive/models/cnn_model_weights.pth'
torch.save(CNN_model.state_dict(), model_path)
