In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.utils.data import random_split


class CarsFolderDataset(Dataset):
    """
    Dataset for folder-based structure where each folder is a class.
    
    Expected structure:
        root_dir/
            car1/
                image1.jpg
                image2.jpg
                ...
            car2/
                image1.jpg
                image2.jpg
                ...
    """
    def __init__(self, root_dir, transform=None, num_classes=20):
        self.root_dir = root_dir
        self.transform = transform
        self.num_classes = num_classes
        
        self.files = []
        self.labels = []
        
        # Get all class folders (sorted for consistency)
        all_class_folders = sorted([d for d in os.listdir(root_dir) 
                                    if os.path.isdir(os.path.join(root_dir, d))])
        
        # Take only first num_classes
        class_folders = all_class_folders[:num_classes]
        
        # Create class to index mapping
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(class_folders)}
        self.idx_to_class = {idx: cls_name for cls_name, idx in self.class_to_idx.items()}
        
        # Load all images
        for class_name in class_folders:
            class_path = os.path.join(root_dir, class_name)
            class_idx = self.class_to_idx[class_name]
            
            # Get all image files in this class folder
            for img_name in os.listdir(class_path):
                img_path = os.path.join(class_path, img_name)
                # Check if it's a file (not a subdirectory)
                if os.path.isfile(img_path):
                    # Optional: filter by image extensions
                    if img_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                        self.files.append(img_path)
                        self.labels.append(class_idx)
        
        print(f"Loaded {len(self.files)} images from {len(class_folders)} classes.")
        print(f"Classes: {class_folders}")
    
    def __len__(self):
        return len(self.files)
    
    def __getitem__(self, idx):
        img_path = self.files[idx]
        img = Image.open(img_path).convert("RGB")
        
        if self.transform:
            img = self.transform(img)
        
        label = self.labels[idx]
        return img, label

In [2]:
class AugmentedDataset(Dataset):
    """
    Dataset wrapper that creates and saves augmented versions of images.
    Expands dataset by 4x: original + 3 augmentations per image.
    """
    def __init__(self, base_dataset, save_dir, augment_transform, transform=None):
        self.base = base_dataset
        self.save_dir = save_dir
        self.transform = transform
        self.augment_transform = augment_transform
        os.makedirs(self.save_dir, exist_ok=True)
        self.aug_paths = []
        self._prepare_augmented_images()
    
    def _prepare_augmented_images(self):
        print("Preparing augmented images...")
        for idx in range(len(self.base)):
            # Use the full file path and sanitize it for filename
            fname = self.base.files[idx].replace("/", "_").replace("\\", "_")
            orig_path = os.path.join(self.save_dir, f"{fname}_orig.jpg")
            aug_paths = [
                os.path.join(self.save_dir, f"{fname}_aug1.jpg"),
                os.path.join(self.save_dir, f"{fname}_aug2.jpg"),
                os.path.join(self.save_dir, f"{fname}_aug3.jpg"),
            ]
            
            # Only create if missing
            if not all(os.path.exists(p) for p in aug_paths):
                # Load original image
                img = Image.open(self.base.files[idx]).convert("RGB")
                
                # Save original
                if not os.path.exists(orig_path):
                    img.save(orig_path)
                
                # Create and save 3 augmentations
                for p in aug_paths:
                    aug_img = self.augment_transform(img)
                    aug_img.save(p)
            
            self.aug_paths.append([orig_path] + aug_paths)
        
        print(f"Augmented dataset ready: {len(self.aug_paths)} base images × 4 = {len(self)} total samples")
    
    def __len__(self):
        return len(self.base) * 4  # original + 3 augmentations
    
    def __getitem__(self, idx):
        base_idx = idx // 4
        aug_idx = idx % 4  # select which augmentation (0=orig, 1-3=augs)
        img_path = self.aug_paths[base_idx][aug_idx]
        label = self.base.labels[base_idx]
        
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [3]:
augment_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=1.0),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
])
    
# Training transform (applied when loading)
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

In [4]:
base_train_dataset = CarsFolderDataset(
    root_dir='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/train',
    transform=None  # No transform for base dataset
)

aug_dataset = AugmentedDataset(
    base_dataset=base_train_dataset,
    save_dir='augmented_train',
    augment_transform=augment_transform,
    transform=train_transform
)
# Test dataset WITHOUT augmentation
test_dataset = CarsFolderDataset(
    root_dir='/kaggle/input/stanford-car-dataset-by-classes-folder/car_data/car_data/test',
    transform=test_transform
)


# 80/20 split
train_size = int(0.8 * len(aug_dataset))
val_size = len(aug_dataset) - train_size

train_dataset, val_dataset = random_split(aug_dataset, [train_size, val_size])

Loaded 819 images from 20 classes.
Classes: ['AM General Hummer SUV 2000', 'Acura Integra Type R 2001', 'Acura RL Sedan 2012', 'Acura TL Sedan 2012', 'Acura TL Type-S 2008', 'Acura TSX Sedan 2012', 'Acura ZDX Hatchback 2012', 'Aston Martin V8 Vantage Convertible 2012', 'Aston Martin V8 Vantage Coupe 2012', 'Aston Martin Virage Convertible 2012', 'Aston Martin Virage Coupe 2012', 'Audi 100 Sedan 1994', 'Audi 100 Wagon 1994', 'Audi A5 Coupe 2012', 'Audi R8 Coupe 2012', 'Audi RS 4 Convertible 2008', 'Audi S4 Sedan 2007', 'Audi S4 Sedan 2012', 'Audi S5 Convertible 2012', 'Audi S5 Coupe 2012']
Preparing augmented images...
Augmented dataset ready: 819 base images × 4 = 3276 total samples
Loaded 811 images from 20 classes.
Classes: ['AM General Hummer SUV 2000', 'Acura Integra Type R 2001', 'Acura RL Sedan 2012', 'Acura TL Sedan 2012', 'Acura TL Type-S 2008', 'Acura TSX Sedan 2012', 'Acura ZDX Hatchback 2012', 'Aston Martin V8 Vantage Convertible 2012', 'Aston Martin V8 Vantage Coupe 2012', 

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [6]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1000, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [13]:
from torch import nn
import torchvision.models as models
from torch.optim import Adam
from torch.nn import Identity
from torch.nn import CrossEntropyLoss

class Inception(nn.Module):
    def __init__(self, num_output) -> None:
        super().__init__()
        # Load pretrained GoogLeNet (Inception v1)
        inception = models.googlenet(weights=models.GoogLeNet_Weights.DEFAULT)

        # Freeze all pretrained layers
        # for param in inception.parameters():
        #     param.requires_grad = False

        # Replace the final fully-connected layer
        num_filters = inception.fc.in_features
        inception.fc = nn.Linear(num_filters, num_output)

        self.inception_v1 = inception

    def forward(self, x):
        return self.inception_v1(x)

In [14]:
model = Inception(20).to(device)

In [15]:
EPOCHS = 10
learning_rate = 1e-3
criterion = CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [16]:
import datetime as dt
from tqdm import tqdm

total_loss_train_plot = []
total_acc_train_plot = []
total_loss_test_plot = []
total_acc_test_plot = []

start = dt.datetime.now()

for epoch in range(EPOCHS):
    model.train()
    total_train_loss = 0
    correct_train = 0
    total_train = 0

    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    train_loop = tqdm(train_loader, desc="Training", leave=False)

    for inputs, labels in train_loop:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        total_train_loss += loss.item()

        # --- Accuracy ---
        preds = torch.argmax(outputs, dim=1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

        loss.backward()
        optimizer.step()

        train_loop.set_postfix({"Batch Loss": f"{loss.item():.4f}"})

    # --- Validation ---
    model.eval()
    total_val_loss = 0
    correct_val = 0
    total_val = 0

    val_loop = tqdm(val_loader, desc="Validating", leave=False)
    with torch.no_grad():
        for inputs, labels in val_loop:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_val_loss += loss.item()

            # --- Accuracy ---
            preds = torch.argmax(outputs, dim=1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)

            val_loop.set_postfix({"Batch Loss": f"{loss.item():.4f}"})

    # --- Compute averages ---
    avg_train_loss = total_train_loss / len(train_loader)
    avg_val_loss = total_val_loss / len(val_loader)
    train_acc = correct_train / total_train
    val_acc = correct_val / total_val

    total_loss_train_plot.append(avg_train_loss)
    total_acc_train_plot.append(train_acc)
    total_loss_test_plot.append(avg_val_loss)
    total_acc_test_plot.append(val_acc)

    print(f"Train Loss (MAE): {avg_train_loss:.4f} | Train Acc: {train_acc*100:.2f}%")
    print(f"Val Loss (MAE):   {avg_val_loss:.4f} | Val Acc:   {val_acc*100:.2f}%")
    print("=" * 60)

end = dt.datetime.now()
print(f"Training completed in: {end - start}")


Epoch 1/10


                                                                            

Train Loss (MAE): 1.5655 | Train Acc: 53.70%
Val Loss (MAE):   1.0467 | Val Acc:   63.11%

Epoch 2/10


                                                                            

Train Loss (MAE): 0.3261 | Train Acc: 91.15%
Val Loss (MAE):   1.0292 | Val Acc:   66.77%

Epoch 3/10


                                                                            

Train Loss (MAE): 0.1003 | Train Acc: 97.71%
Val Loss (MAE):   0.4702 | Val Acc:   83.23%

Epoch 4/10


                                                                            

Train Loss (MAE): 0.0698 | Train Acc: 98.05%
Val Loss (MAE):   0.3699 | Val Acc:   87.96%

Epoch 5/10


                                                                            

Train Loss (MAE): 0.0458 | Train Acc: 98.93%
Val Loss (MAE):   0.3416 | Val Acc:   90.40%

Epoch 6/10


                                                                            

Train Loss (MAE): 0.0615 | Train Acc: 98.44%
Val Loss (MAE):   0.5694 | Val Acc:   84.30%

Epoch 7/10


                                                                            

Train Loss (MAE): 0.1141 | Train Acc: 96.72%
Val Loss (MAE):   1.0381 | Val Acc:   69.05%

Epoch 8/10


                                                                            

Train Loss (MAE): 0.1394 | Train Acc: 95.92%
Val Loss (MAE):   0.5672 | Val Acc:   83.69%

Epoch 9/10


                                                                            

Train Loss (MAE): 0.1080 | Train Acc: 97.10%
Val Loss (MAE):   0.9873 | Val Acc:   71.80%

Epoch 10/10


                                                                            

Train Loss (MAE): 0.0747 | Train Acc: 97.90%
Val Loss (MAE):   0.6738 | Val Acc:   82.62%
Training completed in: 1:17:08.419771




In [17]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import label_binarize
import seaborn as sns
import os

os.makedirs("/kaggle/working/metrics", exist_ok=True)

model.eval()
all_labels = []
all_preds = []
all_probs = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        preds = outputs.argmax(dim=1)

        all_labels.append(labels.cpu().numpy())
        all_preds.append(preds.cpu().numpy())
        all_probs.append(probs.cpu().numpy())

all_labels = np.concatenate(all_labels)
all_preds = np.concatenate(all_preds)
all_probs = np.concatenate(all_probs)

# ---- Confusion Matrix as PNG ----
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=False, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.savefig("/kaggle/working/metrics/Inception_confusion_matrix.png")
plt.close()

# ---- Classification Report as TXT ----
report = classification_report(all_labels, all_preds, zero_division=0)
with open("/kaggle/working/metrics/Inception_classification_report.txt", "w") as f:
    f.write(report)

print("Classification Report:\n", report)

# ---- Smart ROC & AUC ----
MAX_ROC_SAMPLES = 1500  # optional to speed up

if len(all_labels) > MAX_ROC_SAMPLES:
    idx = np.random.choice(len(all_labels), MAX_ROC_SAMPLES, replace=False)
    y_true = all_labels[idx]
    y_prob = all_probs[idx]
else:
    y_true = all_labels
    y_prob = all_probs

NUM_CLASSES = y_prob.shape[1]
y_true_bin = label_binarize(y_true, classes=list(range(NUM_CLASSES)))

# Micro-average ROC
fpr_micro, tpr_micro, _ = roc_curve(y_true_bin.ravel(), y_prob.ravel())
roc_auc_micro = auc(fpr_micro, tpr_micro)

# Macro-average ROC
fpr_macro = np.linspace(0, 1, 100)
tpr_macro = np.zeros_like(fpr_macro)
for i in range(NUM_CLASSES):
    fpr_i, tpr_i, _ = roc_curve(y_true_bin[:, i], y_prob[:, i])
    tpr_macro += np.interp(fpr_macro, fpr_i, tpr_i)
tpr_macro /= NUM_CLASSES
roc_auc_macro = auc(fpr_macro, tpr_macro)

# Save ROC AUC in TXT
with open("/kaggle/working/metrics/Inception_roc_auc.txt", "w") as f:
    f.write(f"Micro-average AUC: {roc_auc_micro:.4f}\n")
    f.write(f"Macro-average AUC: {roc_auc_macro:.4f}\n")

# Optional: Save ROC curves as image
plt.figure(figsize=(8,6))
plt.plot(fpr_micro, tpr_micro,
         label=f'Micro-average ROC (AUC = {roc_auc_micro:.4f})', linewidth=2)
plt.plot(fpr_macro, tpr_macro,
         label=f'Macro-average ROC (AUC = {roc_auc_macro:.4f})', linewidth=2)
plt.plot([0,1], [0,1], 'k--', linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend(loc='lower right')
plt.savefig("/kaggle/working/metrics/Inception_roc_curves.png")
plt.close()

print(f"Micro AUC: {roc_auc_micro:.4f}, Macro AUC: {roc_auc_macro:.4f}")

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.95      0.95        44
           1       1.00      0.57      0.72        44
           2       0.47      0.22      0.30        32
           3       0.94      0.37      0.53        43
           4       0.63      0.86      0.73        42
           5       1.00      0.42      0.60        40
           6       0.47      0.72      0.57        39
           7       1.00      0.09      0.16        45
           8       0.32      0.83      0.47        41
           9       0.67      0.61      0.63        33
          10       0.63      0.87      0.73        38
          11       0.81      0.62      0.70        40
          12       0.74      0.55      0.63        42
          13       0.49      0.90      0.63        41
          14       0.86      0.84      0.85        43
          15       0.78      0.50      0.61        36
          16       0.61      0.76      0.67        45
   