The Rice Dataset (Raw Images) has 38 classes and each class have 500 images. Which are loaded here

In [None]:
import os

base_dir = '/kaggle/input/riceds-original/Original'
subdirs = sorted(os.listdir(base_dir))
print(f'Found {len(subdirs)} class-folders, e.g.:', subdirs[:5])

# Count files in each
for d in subdirs[:5]:
    n = len(os.listdir(os.path.join(base_dir, d)))
    print(f'  {d}: {n} files')


Here we are checking whether the images are loaded correctly or not

In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt


base_dir = '/kaggle/input/riceds-original/Original'

# Pick 3 class‐folders × 3 images each = 9 samples
classes = sorted(os.listdir(base_dir))[:3]
images, labels = [], []
for cls in classes:
    cls_path = os.path.join(base_dir, cls)
    for fn in sorted(os.listdir(cls_path))[:3]:
        img = Image.open(os.path.join(cls_path, fn))
        images.append(img)
        labels.append(cls)

# Plot 3×3 grid
fig, axes = plt.subplots(3, 3, figsize=(6, 6))
for ax, img, lbl in zip(axes.flatten(), images, labels):
    ax.imshow(img)
    ax.set_title(lbl)
    ax.axis('off')

plt.tight_layout()
plt.show()

A peak toward the left means most pixels are dark—your images may be under-exposed or generally low-contrast.

A peak toward the right suggests brighter images—possibly over-exposed or with lots of light backgrounds.

A wide spread (values across the whole 0–255 range) indicates good contrast.

A narrow, tall spike (e.g. around 120-140) shows most of your pixels cluster around a medium gray—your images may look “flat.”

In [None]:
import numpy as np

# sample a subset of images
pix_vals = []
for cls in classes[:5]:          # for speed, just first 5 classes
    fns = os.listdir(os.path.join(base_dir, cls))[:20]
    for fn in fns:
        arr = np.array(Image.open(os.path.join(base_dir, cls, fn)).convert('L'))
        pix_vals.extend(arr.flatten())

plt.figure(figsize=(6,4))
plt.hist(pix_vals, bins=50)
plt.title('Grayscale Pixel Distribution (sampled)')
plt.xlabel('Pixel value')
plt.ylabel('Frequency')
plt.show()


full PyTorch example showing how to fine-tune a pretrained ResNet50 on your 38-class rice dataset, with simple augmentations and a two-stage training (head first, then full-fine-tune) to help boost accuracy.

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Paths & hyper-parameters
base_dir    = '/kaggle/input/riceds-original/Original'
batch_size  = 32
num_epochs  = 20
num_classes = 38
device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 2. Data transforms
train_tf = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

# 3. Dataset & split
full_dataset = datasets.ImageFolder(base_dir, transform=train_tf)
train_size   = int(0.8 * len(full_dataset))
val_size     = len(full_dataset) - train_size
train_ds, val_ds = random_split(
    full_dataset, 
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)
val_ds.dataset.transform = val_tf

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=4)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=4)

# 4. Model setup
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
opt_head = optim.Adam(model.fc.parameters(), lr=1e-3, weight_decay=1e-4)
opt_full = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9, weight_decay=1e-4)
sched    = optim.lr_scheduler.StepLR(opt_full, step_size=7, gamma=0.1)

# 5. Training & validation loops
def run_epoch(loader, model, criterion, optimizer=None):
    is_train = optimizer is not None
    loop = tqdm(loader, desc='Train' if is_train else ' Val ')
    running_loss, running_corrects = 0.0, 0
    for x, y in loop:
        x, y = x.to(device), y.to(device)
        with torch.set_grad_enabled(is_train):
            logits = model(x)
            loss   = criterion(logits, y)
            preds  = torch.argmax(logits, dim=1)

            if is_train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        running_loss    += loss.item() * x.size(0)
        running_corrects+= (preds == y).sum().item()

    epoch_loss = running_loss / len(loader.dataset)
    epoch_acc  = running_corrects / len(loader.dataset)
    return epoch_loss, epoch_acc

# 6. Stage 1: Train head only
print("\n=== Stage 1: training head ===")
for epoch in range(3):
    tl, ta = run_epoch(train_loader, model, criterion, opt_head)
    vl, va = run_epoch(val_loader,   model, criterion, None)
    print(f"[Head] Epoch {epoch+1}/3  train_loss={tl:.3f} train_acc={ta:.3f}  val_loss={vl:.3f} val_acc={va:.3f}")

# 7. Stage 2: Fine-tune all layers
print("\n=== Stage 2: fine-tuning full model ===")
for epoch in range(num_epochs):
    tl, ta = run_epoch(train_loader, model, criterion, opt_full)
    sched.step()
    vl, va = run_epoch(val_loader,   model, criterion, None)
    print(f"[Full] Epoch {epoch+1}/{num_epochs}  train_loss={tl:.3f} train_acc={ta:.3f}  val_loss={vl:.3f} val_acc={va:.3f}")

# 8. Save best model
torch.save(model.state_dict(), 'rice_resnet50_finetuned.pt')

# 9. Evaluation Metrics
print("\n=== Evaluation Metrics on Validation Set ===")
model.eval()

all_preds = []
all_targets = []
all_probs = []

with torch.no_grad():
    for x, y in tqdm(val_loader, desc='Eval metrics'):
        x, y = x.to(device), y.to(device)
        logits = model(x)
        probs  = torch.softmax(logits, dim=1)
        preds  = torch.argmax(probs, dim=1)

        all_preds.append(preds.cpu().numpy())
        all_targets.append(y.cpu().numpy())
        all_probs.append(probs.cpu().numpy())

# Concatenate results
all_preds   = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)
all_probs   = np.concatenate(all_probs)

# Classification Report
print("\nClassification Report:")
print(classification_report(all_targets, all_preds, digits=4))

# Confusion Matrix
cm = confusion_matrix(all_targets, all_preds)
plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.tight_layout()
plt.show()

# AUC Score (One-vs-Rest)
try:
    auc_score = roc_auc_score(all_targets, all_probs, multi_class='ovr')
    print(f"\nAUC Score (macro OVR): {auc_score:.4f}")
except ValueError as e:
    print(f"\nAUC Score could not be computed: {e}")


BYOL implementation 

In [None]:
import os
import copy
import math
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from PIL import Image
from sklearn.metrics import classification_report
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

# Configuration
base_dir = '/kaggle/input/riceds-original/Original'
BATCH_SIZE = 32
NUM_WORKERS = 2
NUM_CLASSES = 38
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Augmentations for BYOL
class CustomBYOLTransform:
    def __init__(self, size=224):
        normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
        self.transform1 = transforms.Compose([
            transforms.RandomResizedCrop(size),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.8, 0.8, 0.8, 0.2),
            transforms.RandomGrayscale(p=0.2),
            transforms.GaussianBlur(kernel_size=9),
            transforms.ToTensor(), normalize
        ])
        self.transform2 = transforms.Compose(self.transform1.transforms)

    def __call__(self, x):
        return self.transform1(x), self.transform2(x)

# Dataset class with dual views
class ImageFolderBYOL(datasets.ImageFolder):
    def __getitem__(self, index):
        path, _ = self.samples[index]
        img = Image.open(path).convert('RGB')
        return self.transform(img)

# Load BYOL training data
full_dataset = ImageFolderBYOL(base_dir, transform=CustomBYOLTransform())
train_size = int(0.8 * len(full_dataset))
train_dataset_byol, _ = random_split(full_dataset, [train_size, len(full_dataset) - train_size])
train_loader_byol = DataLoader(train_dataset_byol, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

# Evaluation transform and data
eval_tf = transforms.Compose([
    transforms.Resize(256), transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
eval_dataset = datasets.ImageFolder(base_dir, transform=eval_tf)
train_eval_size = int(0.8 * len(eval_dataset))
train_dataset_eval, test_dataset_eval = random_split(eval_dataset, [train_eval_size, len(eval_dataset) - train_eval_size])
train_loader_eval = DataLoader(train_dataset_eval, batch_size=BATCH_SIZE, shuffle=True)
test_loader_eval = DataLoader(test_dataset_eval, batch_size=BATCH_SIZE, shuffle=False)

# Model components
class BYOLResNet(nn.Module):
    def __init__(self):
        super().__init__()
        base = models.resnet50(pretrained=False)
        self.encoder = nn.Sequential(*list(base.children())[:-1])
        self.feature_dim = 2048

    def forward(self, x):
        return self.encoder(x).view(x.size(0), -1)

class ProjectionHead(nn.Module):
    def __init__(self, in_dim, hidden, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden), nn.BatchNorm1d(hidden), nn.ReLU(),
            nn.Linear(hidden, out_dim)
        )

    def forward(self, x): return self.net(x)

class PredictionHead(nn.Module):
    def __init__(self, in_dim, hidden, out_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden), nn.BatchNorm1d(hidden), nn.ReLU(),
            nn.Linear(hidden, out_dim)
        )

    def forward(self, x): return self.net(x)

class BYOL(nn.Module):
    def __init__(self, encoder, proj_dim, hidden_dim):
        super().__init__()
        self.online_encoder = encoder
        self.online_projector = ProjectionHead(encoder.feature_dim, hidden_dim, proj_dim)
        self.online_predictor = PredictionHead(proj_dim, hidden_dim, proj_dim)

        self.target_encoder = copy.deepcopy(encoder)
        self.target_projector = copy.deepcopy(self.online_projector)
        for p in self.target_encoder.parameters(): p.requires_grad = False
        for p in self.target_projector.parameters(): p.requires_grad = False

    def forward(self, x1, x2):
        z1 = self.online_projector(self.online_encoder(x1))
        z2 = self.online_projector(self.online_encoder(x2))
        p1 = self.online_predictor(z1)
        p2 = self.online_predictor(z2)
        with torch.no_grad():
            t1 = self.target_projector(self.target_encoder(x1))
            t2 = self.target_projector(self.target_encoder(x2))
        return p1, p2, t1, t2

    def update_target_network(self, momentum=0.996):
        for p_o, p_t in zip(self.online_encoder.parameters(), self.target_encoder.parameters()):
            p_t.data = p_t.data * momentum + p_o.data * (1. - momentum)
        for p_o, p_t in zip(self.online_projector.parameters(), self.target_projector.parameters()):
            p_t.data = p_t.data * momentum + p_o.data * (1. - momentum)

# BYOL loss
def byol_loss(p, t):
    p = F.normalize(p, dim=-1)
    t = F.normalize(t, dim=-1)
    return 2 - 2 * (p * t).sum(dim=-1).mean()

# Pretrain
byol_model = BYOL(BYOLResNet().to(DEVICE), 128, 512).to(DEVICE)
optimizer = optim.Adam(byol_model.parameters(), lr=1e-3)
print("Pretraining BYOL...")
for epoch in range(10):
    byol_model.train()
    total_loss = 0
    for x1, x2 in tqdm(train_loader_byol):
        x1, x2 = x1.to(DEVICE), x2.to(DEVICE)
        optimizer.zero_grad()
        p1, p2, t1, t2 = byol_model(x1, x2)
        loss = byol_loss(p1, t2) + byol_loss(p2, t1)
        loss.backward()
        optimizer.step()
        byol_model.update_target_network()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss / len(train_loader_byol):.4f}")

torch.save(byol_model.online_encoder.state_dict(), "byol_encoder.pth")

# Linear evaluation
class LinearClassifier(nn.Module):
    def __init__(self, feature_dim, num_classes):
        super().__init__()
        self.fc = nn.Linear(feature_dim, num_classes)

    def forward(self, x): return self.fc(x)

encoder = BYOLResNet().to(DEVICE)
encoder.load_state_dict(torch.load("byol_encoder.pth"))
for param in encoder.parameters():
    param.requires_grad = False

classifier = LinearClassifier(encoder.feature_dim, NUM_CLASSES).to(DEVICE)
optimizer = optim.Adam(classifier.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

print("Training classifier...")
for epoch in range(10):
    classifier.train()
    for images, labels in train_loader_eval:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        with torch.no_grad():
            features = encoder(images)
        outputs = classifier(features)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Evaluation
classifier.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in test_loader_eval:
        images = images.to(DEVICE)
        features = encoder(images)
        outputs = classifier(features)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

print("\n Classification Report:")
print(classification_report(all_labels, all_preds, digits=4))


SimCLR

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader, random_split
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import classification_report
import numpy as np

# Configuration
base_dir = '/kaggle/input/riceds-original/Original'
BATCH_SIZE = 32
NUM_CLASSES = 38
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# SimCLR dual augmentations
class SimCLRTransform:
    def __init__(self, size=224):
        normalize = transforms.Normalize([0.485, 0.456, 0.406],
                                         [0.229, 0.224, 0.225])
        self.base_transform = transforms.Compose([
            transforms.RandomResizedCrop(size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.ToTensor(), normalize
        ])

    def __call__(self, x):
        return self.base_transform(x), self.base_transform(x)

# Dataset wrapper
class ImageFolderSimCLR(datasets.ImageFolder):
    def __getitem__(self, index):
        path, _ = self.samples[index]
        image = Image.open(path).convert("RGB")
        return self.transform(image)

# Load dataset
dataset = ImageFolderSimCLR(base_dir, transform=SimCLRTransform())
train_size = int(0.8 * len(dataset))
train_dataset, _ = random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# ResNet50 encoder with projection head
class ResNetSimCLR(nn.Module):
    def __init__(self, out_dim=128):
        super().__init__()
        resnet = models.resnet50(pretrained=False)
        self.encoder = nn.Sequential(*list(resnet.children())[:-1])
        self.projector = nn.Sequential(
            nn.Linear(resnet.fc.in_features, 512),
            nn.ReLU(),
            nn.Linear(512, out_dim)
        )

    def forward(self, x):
        h = self.encoder(x).squeeze()
        z = self.projector(h)
        return F.normalize(z, dim=1)

# NT-Xent Loss
def nt_xent_loss(z1, z2, temperature=0.5):
    z = torch.cat([z1, z2], dim=0)
    sim = F.cosine_similarity(z.unsqueeze(1), z.unsqueeze(0), dim=2)
    sim /= temperature
    N = z1.size(0)
    mask = torch.eye(2 * N, dtype=torch.bool, device=z.device)
    sim.masked_fill_(mask, -9e15)
    pos = torch.cat([torch.diag(sim, N), torch.diag(sim, -N)], dim=0)
    loss = -pos + torch.logsumexp(sim, dim=1)
    return loss.mean()

# Train SimCLR encoder
model = ResNetSimCLR().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

print("Training SimCLR encoder...")
for epoch in range(10):
    model.train()
    total_loss = 0
    for x1, x2 in tqdm(train_loader):
        x1, x2 = x1.to(DEVICE), x2.to(DEVICE)
        z1, z2 = model(x1), model(x2)
        loss = nt_xent_loss(z1, z2)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss / len(train_loader):.4f}")

# Save encoder
torch.save(model.encoder.state_dict(), "simclr_encoder.pth")

# Linear evaluation
class LinearClassifier(nn.Module):
    def __init__(self, in_dim, num_classes):
        super().__init__()
        self.fc = nn.Linear(in_dim, num_classes)

    def forward(self, x): return self.fc(x)

# Load frozen encoder
encoder = ResNetSimCLR().encoder.to(DEVICE)
encoder.load_state_dict(torch.load("simclr_encoder.pth"))
for p in encoder.parameters(): p.requires_grad = False

classifier = LinearClassifier(2048, NUM_CLASSES).to(DEVICE)
optimizer = optim.Adam(classifier.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Evaluation data
eval_tf = transforms.Compose([
    transforms.Resize(256), transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
eval_dataset = datasets.ImageFolder(base_dir, transform=eval_tf)
train_size = int(0.8 * len(eval_dataset))
train_ds, test_ds = random_split(eval_dataset, [train_size, len(eval_dataset) - train_size])
train_loader_eval = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader_eval = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

# Train classifier
print("Training linear classifier...")
for epoch in range(10):
    classifier.train()
    for images, labels in train_loader_eval:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        with torch.no_grad():
            features = encoder(images).squeeze()
        outputs = classifier(features)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Evaluate
classifier.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in test_loader_eval:
        images = images.to(DEVICE)
        features = encoder(images).squeeze()
        outputs = classifier(features)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

print("\n📊 SimCLR Evaluation:")
print(classification_report(all_labels, all_preds, digits=4))
