In [2]:
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import timm

import matplotlib.pyplot as plt
import numpy as pd
import sys
from tqdm.notebook import tqdm

In [None]:
#PyTorch Dataset

In [3]:
class CardClassifierDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = ImageFolder(data_dir, transform = transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

    @property
    def classes(self):
        return self.data.classes
        

In [4]:
#Dataset and Dataloader
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_folder = '/aul/homes/amaha038/DeepLearning/Datasets/Card_Dataset_Kaggle/train/'
test_folder = '/aul/homes/amaha038/DeepLearning/Datasets/Card_Dataset_Kaggle/test/'
valid_folder = '/aul/homes/amaha038/DeepLearning/Datasets/Card_Dataset_Kaggle/valid/'


train_dataset = CardClassifierDataset(data_dir = train_folder, transform = transform)
test_dataset = CardClassifierDataset(data_dir = test_folder, transform = transform)
valid_dataset = CardClassifierDataset(data_dir = valid_folder, transform = transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [5]:
#printing the number of features_out for any model
model_name = 'vit_base_patch16_224'
base_model = timm.create_model(model_name, pretrained=True)

# Removing the classification head (usually the last layer)
features = nn.Sequential(*list(base_model.children())[:-1])

# Creating a dummy input (batch_size=1, 3 channels, 224x224)
dummy_input = torch.randn(1, 3, 224, 224)

# Passing through the feature extractor
with torch.no_grad():
    output = features(dummy_input)
    print(f"Feature shape: {output.shape}")
    print(f"Flattened feature size: {output.view(1, -1).shape[1]}")

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Feature shape: torch.Size([1, 196, 768])
Flattened feature size: 150528


In [6]:
#Simple Model

class SimpleCardClassifier(nn.Module):
    def __init__(self, num_classes=53):
        super(SimpleCardClassifier, self).__init__()
        # Where we define all the parts of the model
        self.base_model = timm.create_model('vit_base_patch16_224', pretrained=True)
        self.features = nn.Sequential(*list(self.base_model.children())[:-1])

        feature_out = 150528
        # Make a classifier
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(feature_out, num_classes)
        )
    
    def forward(self, x):
        # Connect these parts and return the output
        x = self.features(x)
        output = self.classifier(x)
        return output

In [11]:
model = SimpleCardClassifier(num_classes=53)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model.to(device)


SimpleCardClassifier(
  (base_model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (patch_drop): Identity()
    (norm_pre): Identity()
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (q_norm): Identity()
          (k_norm): Identity()
          (attn_drop): Dropout(p=0.0, inplace=False)
          (norm): Identity()
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): Identity()
        (drop_path1): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)

In [12]:
#loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [15]:
#training loop
num_epochs = 2
train_losses, val_losses = [], []

for epoch in range(num_epochs):
    #Training_Phase
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    for images, labels in tqdm(train_loader, desc='Training loop'):
        #moving inputs and labels to the device
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        # Prediction
        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

        loss.backward()
        optimizer.step()
        running_loss += loss.item() * labels.size(0) #batch loss across all batches
    train_loss = running_loss / len(train_loader.dataset) # average loss per sample across the whole epoch
    train_losses.append(train_loss)
    train_acc = correct_train / total_train


    #validation_phase
    model.eval()
    running_loss = 0.0
    correct_val = 0
    total_val = 0
    with torch.no_grad():
        for images, labels in tqdm(valid_loader, desc='Validation loop'):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)
            running_loss += loss.item() * labels.size(0)

    val_loss = running_loss / len(valid_loader.dataset)
    val_losses.append(val_loss)
    val_acc = correct_val / total_val


    print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss}, Validation loss: {val_loss}, Train Accuracy: {train_acc}, Validation Accuracy: {val_acc}")
    
    
    




Training loop:   0%|          | 0/239 [00:00<?, ?it/s]

Validation loop:   0%|          | 0/9 [00:00<?, ?it/s]

Epoch 1/2 - Train loss: 3.2932991210961515, Validation loss: 2.409352890050636, Train Accuracy: 0.3516526757607555, Validation Accuracy: 0.4037735849056604


Training loop:   0%|          | 0/239 [00:00<?, ?it/s]

Validation loop:   0%|          | 0/9 [00:00<?, ?it/s]

Epoch 2/2 - Train loss: 2.6798391734940807, Validation loss: 2.3849118970475107, Train Accuracy: 0.4092339979013641, Validation Accuracy: 0.4339622641509434
