In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.utils.data import random_split


class CarsFolderDataset(Dataset):
    """
    Dataset for folder-based structure where each folder is a class.
    
    Expected structure:
        root_dir/
            car1/
                image1.jpg
                image2.jpg
                ...
            car2/
                image1.jpg
                image2.jpg
                ...
    """
    def __init__(self, root_dir, transform=None, num_classes=20):
        self.root_dir = root_dir
        self.transform = transform
        self.num_classes = num_classes
        
        self.files = []
        self.labels = []
        
        # Get all class folders (sorted for consistency)
        all_class_folders = sorted([d for d in os.listdir(root_dir) 
                                    if os.path.isdir(os.path.join(root_dir, d))])
        
        # Take only first num_classes
        class_folders = all_class_folders[:num_classes]
        
        # Create class to index mapping
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(class_folders)}
        self.idx_to_class = {idx: cls_name for cls_name, idx in self.class_to_idx.items()}
        
        # Load all images
        for class_name in class_folders:
            class_path = os.path.join(root_dir, class_name)
            class_idx = self.class_to_idx[class_name]
            
            # Get all image files in this class folder
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                # Check if it's a file (not a subdirectory)
                if os.path.isfile(img_path):
                    # Optional: filter by image extensions
                    if img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                        self.files.append(img_path)
                        self.labels.append(class_idx)
        
        print(f"Loaded {len(self.files)} images from {len(class_folders)} classes.")
        print(f"Classes: {class_folders}")
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        img_path = self.files[idx]
        img = Image.open(img_path).convert("RGB")
        
        if self.transform:
            img = self.transform(img)
        
        label = self.labels[idx]
        return img, label

In [2]:
class AugmentedDataset(Dataset):
    """
    Dataset wrapper that creates and saves augmented versions of images.
    Expands dataset by 4x: original + 3 augmentations per image.
    """
    def __init__(self, base_dataset, save_dir, augment_transform, transform=None):
        self.base = base_dataset
        self.save_dir = save_dir
        self.transform = transform
        self.augment_transform = augment_transform
        os.makedirs(self.save_dir, exist_ok=True)
        self.aug_paths = []
        self._prepare_augmented_images()
    
    def _prepare_augmented_images(self):
        print("Preparing augmented images...")
        for idx in range(len(self.base)):
            # Use the full file path and sanitize it for filename
            fname = self.base.files[idx].replace("/", "_").replace("\\", "_")
            orig_path = os.path.join(self.save_dir, f"{fname}_orig.jpg")
            aug_paths = [
                os.path.join(self.save_dir, f"{fname}_aug1.jpg"),
                os.path.join(self.save_dir, f"{fname}_aug2.jpg"),
                os.path.join(self.save_dir, f"{fname}_aug3.jpg"),
            ]
            
            # Only create if missing
            if not all(os.path.exists(p) for p in aug_paths):
                # Load original image
                img = Image.open(self.base.files[idx]).convert("RGB")
                
                # Save original
                if not os.path.exists(orig_path):
                    img.save(orig_path)
                
                # Create and save 3 augmentations
                for p in aug_paths:
                    aug_img = self.augment_transform(img)
                    aug_img.save(p)
            
            self.aug_paths.append([orig_path] + aug_paths)
        
        print(f"Augmented dataset ready: {len(self.aug_paths)} base images × 4 = {len(self)} total samples")
    
    def __len__(self):
        return len(self.base) * 4  # original + 3 augmentations
    
    def __getitem__(self, idx):
        base_idx = idx // 4
        aug_idx = idx % 4  # select which augmentation (0=orig, 1-3=augs)
        img_path = self.aug_paths[base_idx][aug_idx]
        label = self.base.labels[base_idx]
        
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [3]:
augment_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
])
    
# Training transform (applied when loading)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

In [4]:
base_train_dataset = CarsFolderDataset(
    root_dir='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train',
    transform=None  # No transform for base dataset
)

aug_dataset = AugmentedDataset(
    base_dataset=base_train_dataset,
    save_dir='augmented_train',
    augment_transform=augment_transform,
    transform=train_transform
)
# Test dataset WITHOUT augmentation
test_dataset = CarsFolderDataset(
    root_dir='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/test',
    transform=test_transform
)


# 80/20 split
train_size = int(0.8 * len(aug_dataset))
val_size = len(aug_dataset) - train_size

train_dataset, val_dataset = random_split(aug_dataset, [train_size, val_size])

Loaded 819 images from 20 classes.
Classes: ['AM General Hummer SUV 2000', 'Acura Integra Type R 2001', 'Acura RL Sedan 2012', 'Acura TL Sedan 2012', 'Acura TL Type-S 2008', 'Acura TSX Sedan 2012', 'Acura ZDX Hatchback 2012', 'Aston Martin V8 Vantage Convertible 2012', 'Aston Martin V8 Vantage Coupe 2012', 'Aston Martin Virage Convertible 2012', 'Aston Martin Virage Coupe 2012', 'Audi 100 Sedan 1994', 'Audi 100 Wagon 1994', 'Audi A5 Coupe 2012', 'Audi R8 Coupe 2012', 'Audi RS 4 Convertible 2008', 'Audi S4 Sedan 2007', 'Audi S4 Sedan 2012', 'Audi S5 Convertible 2012', 'Audi S5 Coupe 2012']
Preparing augmented images...
Augmented dataset ready: 819 base images × 4 = 3276 total samples
Loaded 811 images from 20 classes.
Classes: ['AM General Hummer SUV 2000', 'Acura Integra Type R 2001', 'Acura RL Sedan 2012', 'Acura TL Sedan 2012', 'Acura TL Type-S 2008', 'Acura TSX Sedan 2012', 'Acura ZDX Hatchback 2012', 'Aston Martin V8 Vantage Convertible 2012', 'Aston Martin V8 Vantage Coupe 2012', 

In [5]:
print(f"Dataset size before augmentation: {len(base_train_dataset)}")
print(f"Dataset size after augmentation: {len(aug_dataset)}")

Dataset size before augmentation: 819
Dataset size after augmentation: 3276


In [6]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=64,
    shuffle=False
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False
)

In [7]:
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet34

In [8]:
# Define the model
model = resnet34(pretrained=True)

# Replace the last layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 20)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 181MB/s]


In [9]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Move the model to the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [10]:
# Define the number of epochs
num_epochs = 10

# Train the model
for epoch in range(num_epochs):

    # ---- TRAIN ----
    model.train()
    train_loss = 0.0
    train_acc = 0.0

    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)

        # ---- TRAIN ACCURACY ----
        _, train_preds = torch.max(outputs, 1)
        train_acc += torch.sum(train_preds == labels.data)

    # ---- EVAL ----
    model.eval()
    test_loss = 0.0
    test_acc = 0.0

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item() * inputs.size(0)

            _, preds = torch.max(outputs, 1)
            test_acc += torch.sum(preds == labels.data)

    # Normalize metrics
    train_loss /= len(train_dataset)
    test_loss /= len(val_dataset)

    train_acc = train_acc.double() / len(train_dataset)
    test_acc = test_acc.double() / len(val_dataset)

    # ---- PRINT ----
    print(
        f"Epoch [{epoch + 1}/{num_epochs}] "
        f"Train Loss: {train_loss:.4f} "
        f"Train Acc: {train_acc:.4f} "
        f"val Loss: {test_loss:.4f} "
        f"val Acc: {test_acc:.4f}"
    )


Epoch [1/10] Train Loss: 2.7945 Train Acc: 0.1782 val Loss: 2.3155 val Acc: 0.3765
Epoch [2/10] Train Loss: 1.8434 Train Acc: 0.5714 val Loss: 1.6349 val Acc: 0.5823
Epoch [3/10] Train Loss: 1.2214 Train Acc: 0.7775 val Loss: 1.2220 val Acc: 0.6982
Epoch [4/10] Train Loss: 0.8026 Train Acc: 0.8874 val Loss: 0.9350 val Acc: 0.7866
Epoch [5/10] Train Loss: 0.5334 Train Acc: 0.9450 val Loss: 0.7484 val Acc: 0.8216
Epoch [6/10] Train Loss: 0.3526 Train Acc: 0.9782 val Loss: 0.6089 val Acc: 0.8704
Epoch [7/10] Train Loss: 0.2348 Train Acc: 0.9916 val Loss: 0.5118 val Acc: 0.9085
Epoch [8/10] Train Loss: 0.1645 Train Acc: 0.9962 val Loss: 0.4416 val Acc: 0.9223
Epoch [9/10] Train Loss: 0.1182 Train Acc: 0.9985 val Loss: 0.3977 val Acc: 0.9329
Epoch [10/10] Train Loss: 0.0915 Train Acc: 0.9985 val Loss: 0.3634 val Acc: 0.9314


In [11]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import label_binarize
import seaborn as sns
import os

os.makedirs("/kaggle/working/metrics", exist_ok=True)

model.eval()
all_labels = []
all_preds = []
all_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        preds = outputs.argmax(dim=1)

        all_labels.append(labels.cpu().numpy())
        all_preds.append(preds.cpu().numpy())
        all_probs.append(probs.cpu().numpy())

all_labels = np.concatenate(all_labels)
all_preds = np.concatenate(all_preds)
all_probs = np.concatenate(all_probs)

# ---- Confusion Matrix as PNG ----
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=False, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig("/kaggle/working/metrics/confusion_matrix.png")
plt.close()

# ---- Classification Report as TXT ----
report = classification_report(all_labels, all_preds, zero_division=0)
with open("/kaggle/working/metrics/classification_report.txt", "w") as f:
    f.write(report)

print("Classification Report:\n", report)

# ---- Smart ROC & AUC ----
MAX_ROC_SAMPLES = 1500  # optional to speed up

if len(all_labels) > MAX_ROC_SAMPLES:
    idx = np.random.choice(len(all_labels), MAX_ROC_SAMPLES, replace=False)
    y_true = all_labels[idx]
    y_prob = all_probs[idx]
else:
    y_true = all_labels
    y_prob = all_probs

NUM_CLASSES = y_prob.shape[1]
y_true_bin = label_binarize(y_true, classes=list(range(NUM_CLASSES)))

# Micro-average ROC
fpr_micro, tpr_micro, _ = roc_curve(y_true_bin.ravel(), y_prob.ravel())
roc_auc_micro = auc(fpr_micro, tpr_micro)

# Macro-average ROC
fpr_macro = np.linspace(0, 1, 100)
tpr_macro = np.zeros_like(fpr_macro)
for i in range(NUM_CLASSES):
    fpr_i, tpr_i, _ = roc_curve(y_true_bin[:, i], y_prob[:, i])
    tpr_macro += np.interp(fpr_macro, fpr_i, tpr_i)
tpr_macro /= NUM_CLASSES
roc_auc_macro = auc(fpr_macro, tpr_macro)

# Save ROC AUC in TXT
with open("/kaggle/working/metrics/roc_auc.txt", "w") as f:
    f.write(f"Micro-average AUC: {roc_auc_micro:.4f}\n")
    f.write(f"Macro-average AUC: {roc_auc_macro:.4f}\n")

# Optional: Save ROC curves as image
plt.figure(figsize=(8,6))
plt.plot(fpr_micro, tpr_micro,
         label=f'Micro-average ROC (AUC = {roc_auc_micro:.4f})', linewidth=2)
plt.plot(fpr_macro, tpr_macro,
         label=f'Macro-average ROC (AUC = {roc_auc_macro:.4f})', linewidth=2)
plt.plot([0,1], [0,1], 'k--', linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend(loc='lower right')
plt.savefig("/kaggle/working/metrics/roc_curves.png")
plt.close()

print(f"Micro AUC: {roc_auc_micro:.4f}, Macro AUC: {roc_auc_macro:.4f}")

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        44
           1       0.83      0.89      0.86        44
           2       0.53      0.53      0.53        32
           3       0.59      0.70      0.64        43
           4       0.76      0.76      0.76        42
           5       0.71      0.68      0.69        40
           6       0.94      0.74      0.83        39
           7       0.70      0.51      0.59        45
           8       0.66      0.56      0.61        41
           9       0.66      0.76      0.70        33
          10       0.87      0.89      0.88        38
          11       0.80      0.80      0.80        40
          12       0.77      0.71      0.74        42
          13       0.61      0.68      0.64        41
          14       0.71      0.84      0.77        43
          15       0.62      0.78      0.69        36
          16       0.55      0.51      0.53        45
   