In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# ✅ Step 1: Import Libraries
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, random_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ✅ Step 2: Define Dataset Paths and Combine Original + Augmented Data
base_dir = "/kaggle/input/classification-of-rice-varieties-in-bangladesh/An Extensive Image Dataset for Classifying Rice Varieties in Bangladesh"
original_dir = os.path.join(base_dir, "Original", "Original")
augmented_dir = os.path.join(base_dir, "Augmented", "Augmented")
combined_path = "/kaggle/working/train_data"

if os.path.exists(combined_path):
    shutil.rmtree(combined_path)
os.makedirs(combined_path)

# Merge all images from Original and Augmented into one folder per class
for class_name in os.listdir(original_dir):
    orig_class = os.path.join(original_dir, class_name)
    aug_class = os.path.join(augmented_dir, class_name)
    target_class = os.path.join(combined_path, class_name)
    os.makedirs(target_class, exist_ok=True)

    for file in os.listdir(orig_class):
        shutil.copy(os.path.join(orig_class, file), os.path.join(target_class, file))
    for file in os.listdir(aug_class):
        shutil.copy(os.path.join(aug_class, file), os.path.join(target_class, file))

# ✅ Step 3: Transforms and Load Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(combined_path, transform=transform)
class_names = dataset.classes
num_classes = len(class_names)
print("Classes:", class_names)

# ✅ Step 4: Split into Train and Validation Sets
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# ✅ Step 5: Load Pretrained ResNet50
model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False  # freeze all layers

model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, num_classes)
)

model = model.to(device)

# ✅ Step 6: Define Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# ✅ Step 7: Training Loop
train_acc, val_acc = [], []
train_loss, val_loss = [], []

epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss, running_correct = 0.0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        running_correct += torch.sum(preds == labels)

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_correct.double() / len(train_loader.dataset)
    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc.item())

    # Validation
    model.eval()
    val_running_loss, val_running_correct = 0.0, 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            val_running_correct += torch.sum(preds == labels)

    val_epoch_loss = val_running_loss / len(val_loader.dataset)
    val_epoch_acc = val_running_correct.double() / len(val_loader.dataset)
    val_loss.append(val_epoch_loss)
    val_acc.append(val_epoch_acc.item())

    print(f"Epoch {epoch+1}/{epochs} | "
          f"Train Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f} | "
          f"Val Loss: {val_epoch_loss:.4f}, Acc: {val_epoch_acc:.4f}")

# ✅ Step 8: Plot Accuracy and Loss Curves
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(train_acc, label='Train Acc')
plt.plot(val_acc, label='Val Acc')
plt.title("Accuracy over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Train Loss')
plt.plot(val_loss, label='Val Loss')
plt.title("Loss over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()

# ✅ Step 9: Confusion Matrix + Classification Report
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.numpy())

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

print("\nClassification Report:\n")
print(classification_report(all_labels, all_preds, target_names=class_names))
