In [1]:
import os
import pandas as pd
from PIL import Image
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np


In [44]:
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
class LeafScoreDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):

        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(str(self.image_dir), str(row['image_path']))
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(row['score'], dtype=torch.float32)
        return image, label

In [55]:
# ✅ Step 3: Load Data & Prepare Transforms
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define the root directory of the dataset (contains subfolders for each class)
dataset_dir = "/content/drive/MyDrive/arch/wheat_leaf (1)"  # Use the correct path without the extra "(1)" suffix

# Define image transformations (resize images, convert to tensor, etc.)
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # resize all images to 224x224 (for example)
    transforms.ToTensor(),           # convert PIL image to PyTorch tensor
    # ... you can add normalization or other transforms as needed
])

# Load the dataset using ImageFolder, which will recursively traverse subdirectories
dataset = datasets.ImageFolder(root=dataset_dir, transform=transform)

# Verify that images are loaded and classes are detected
print("Found classes:", dataset.classes)
print("Total images loaded:", len(dataset))

# Example: count images per class to ensure all subfolders are read
from collections import Counter
counts = Counter(label for _, label in dataset.samples)
for class_index, count in counts.items():
    class_name = dataset.classes[class_index]
    print(f"Loaded {count} images for class '{class_name}'")


Found classes: ['Healthy', 'septoria', 'stripe_rust']
Total images loaded: 407
Loaded 102 images for class 'Healthy'
Loaded 97 images for class 'septoria'
Loaded 208 images for class 'stripe_rust'


In [58]:
# Split into training and validation subsets (e.g., 80-20 split)
train_size = int(0.8 * len(dataset))
val_size   = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))

# Create DataLoaders for batching
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16, shuffle=False)

# (Optional) Check a few sample labels to ensure correctness
for i in range(3):
    img, label = train_dataset[i]
    print(f"Sample {i}: class index = {label}, class name = '{dataset.classes[label]}'")
print(dataset.class_to_idx)

from collections import Counter
all_labels = [label for _, label in train_dataset]
print(Counter(all_labels))


Sample 0: class index = 1, class name = 'septoria'
Sample 1: class index = 2, class name = 'stripe_rust'
Sample 2: class index = 2, class name = 'stripe_rust'
{'Healthy': 0, 'septoria': 1, 'stripe_rust': 2}
Counter({2: 169, 1: 82, 0: 74})


In [60]:
from torch.utils.data import WeightedRandomSampler

# Step 1: Count class occurrences
from collections import Counter
labels = [label for _, label in train_dataset]
label_counts = Counter(labels)

# Step 2: Compute weights (inverse frequency)
class_weights = {cls: 1.0 / count for cls, count in label_counts.items()}
sample_weights = [class_weights[label] for label in labels]

# Step 3: Create WeightedRandomSampler
sampler = WeightedRandomSampler(sample_weights, num_samples=len(train_dataset), replacement=True)

# Step 4: Use sampler in train_loader
train_loader = DataLoader(train_dataset, batch_size=16, sampler=sampler)


In [68]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import models, transforms
import torch.nn as nn
import torch.optim as optim

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Split dataset
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Load ResNet50 model
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(dataset.classes))  # Adapt for your num of classes
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
for epoch in range(5):
    model.train()
    train_loss, train_correct, train_total = 0.0, 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * labels.size(0)
        _, preds = torch.max(outputs, 1)
        train_correct += (preds == labels).sum().item()
        train_total += labels.size(0)

    train_acc = train_correct / train_total

    # Validation
    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    val_acc = val_correct / val_total

    print(f"Epoch {epoch+1}/10, Train Loss: {train_loss/train_total:.4f}, "
          f"Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")


Epoch 1/10, Train Loss: 0.4861, Train Acc: 0.8277, Val Acc: 0.9634
Epoch 2/10, Train Loss: 0.0628, Train Acc: 0.9877, Val Acc: 0.9756
Epoch 3/10, Train Loss: 0.0509, Train Acc: 0.9846, Val Acc: 0.9878
Epoch 4/10, Train Loss: 0.0218, Train Acc: 0.9938, Val Acc: 1.0000
Epoch 5/10, Train Loss: 0.0198, Train Acc: 0.9969, Val Acc: 1.0000
