## Cards Image Classification 
### Building Classification Model From Scratch using pytorch 

In [2]:
import torch
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

In [3]:
import os, shutil, numpy as np
from glob import glob
from PIL import Image
from torch.utils.data import random_split, Dataset, DataLoader
from torchvision import transforms as T
torch.manual_seed(2024)

class CustomDataset(Dataset):
    def __init__(self, root, data, transformations = None):
        self.transformations = transformations
        self.im_paths = sorted(glob(f"{root}/{data}/*/*"))
        self.cls_names, self.cls_counts, count, data_count = {}, {}, 0, 0
        
        for idx, im_path in enumerate(self.im_paths):
            class_name = self.get_class(im_path)
            if class_name not in self.cls_names: 
                self.cls_names[class_name] = count
                self.cls_counts[class_name] = 1
                count += 1
            else: 
                self.cls_counts[class_name] += 1
        
    def get_class(self, path): 
        return os.path.dirname(path).split("/")[-1]
    
    def __len__(self): 
        return len(self.im_paths)

    def __getitem__(self, idx):
        im_path = self.im_paths[idx]
        im = Image.open(im_path).convert("RGB")
        gt = self.cls_names[self.get_class(im_path)]
        if self.transformations is not None: 
            im = self.transformations(im)
        return im, gt
    
def get_dls(root, transformations, bs, split = [0.9, 0.05, 0.05], ns = 4):
    
    tr_ds = CustomDataset(root = root, data = "train", transformations = transformations)
    vl_ds = CustomDataset(root = root, data = "valid",transformations = transformations)
    ts_ds = CustomDataset(root = root, data = "test",transformations = transformations)
    
    tr_dl, val_dl, ts_dl = DataLoader(tr_ds, batch_size = bs, shuffle = True, num_workers = ns,pin_memory=True), DataLoader(vl_ds, batch_size = bs, shuffle = False, num_workers = ns), DataLoader(ts_ds, batch_size = 1, shuffle = False, num_workers = ns,pin_memory=True)
    
    return tr_dl, val_dl, ts_dl, tr_ds.cls_names

root = "/kaggle/input/cards-image-datasetclassification"
mean, std, im_size = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225], 224
tfs = T.Compose([T.Resize((im_size, im_size)), T.ToTensor(), T.Normalize(mean = mean, std = std)])
tr_dl, val_dl, ts_dl, classes = get_dls(root = root, transformations = tfs, bs = 36)

print(len(tr_dl)); print(len(val_dl)); print(len(ts_dl)); print(classes)

212
8
265
{'ace of clubs': 0, 'ace of diamonds': 1, 'ace of hearts': 2, 'ace of spades': 3, 'eight of clubs': 4, 'eight of diamonds': 5, 'eight of hearts': 6, 'eight of spades': 7, 'five of clubs': 8, 'five of diamonds': 9, 'five of hearts': 10, 'five of spades': 11, 'four of clubs': 12, 'four of diamonds': 13, 'four of hearts': 14, 'four of spades': 15, 'jack of clubs': 16, 'jack of diamonds': 17, 'jack of hearts': 18, 'jack of spades': 19, 'joker': 20, 'king of clubs': 21, 'king of diamonds': 22, 'king of hearts': 23, 'king of spades': 24, 'nine of clubs': 25, 'nine of diamonds': 26, 'nine of hearts': 27, 'nine of spades': 28, 'queen of clubs': 29, 'queen of diamonds': 30, 'queen of hearts': 31, 'queen of spades': 32, 'seven of clubs': 33, 'seven of diamonds': 34, 'seven of hearts': 35, 'seven of spades': 36, 'six of clubs': 37, 'six of diamonds': 38, 'six of hearts': 39, 'six of spades': 40, 'ten of clubs': 41, 'ten of diamonds': 42, 'ten of hearts': 43, 'ten of spades': 44, 'three 

In [4]:
class CustomCNN(nn.Module):
    def __init__(self, num_classes):
        super(CustomCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.pool_2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(256 * 28 * 28, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool_2(self.relu(self.conv1(x)))
        x = self.relu(self.conv2(x))
        x = self.pool_2(self.relu(self.conv3(x)))
        x = self.relu(self.conv4(x))
        x = self.pool_2(self.relu(self.conv5(x)))
        x = x.view(-1, 256 * 28 * 28)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [5]:
# Initialize the model
model = CustomCNN(num_classes=53) 

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

CustomCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=200704, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=53, bias=True)
  (relu): ReLU()
)

In [None]:
from tqdm import tqdm

# Train the model
num_epochs = 30
for epoch in range(num_epochs):
    # Training phase
    model.train()  # training mode
    running_loss = 0.0
    
    # Wrap the training data loader with tqdm
    with tqdm(total=len(tr_dl)) as pbar:
        for i, (inputs, labels) in enumerate(tr_dl, 0):
            optimizer.zero_grad()  # Zero the parameter gradients

            # Forward pass
            outputs = model(inputs.to(device))
        
            # Calculate loss
            loss = criterion(outputs, labels.to(device))
        
            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Update tqdm progress bar
            pbar.update(1)
            pbar.set_description(f"Epoch [{epoch + 1}/{num_epochs}], Training Loss: {running_loss / (i + 1):.4f}")

    # Validation phase
    model.eval()  # evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_dl:
            outputs = model(inputs.to(device))
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels.to(device)).sum().item()

    # Calculate accuracy
    val_accuracy = correct / total

    # Print statistics
    print(f'Epoch [{epoch + 1}/{num_epochs}], '
          f'Training Loss: {running_loss / len(tr_dl):.4f}, '
          f'Validation Accuracy: {100 * val_accuracy:.2f}%')

print('Finished Training')


Epoch [1/30], Training Loss: 1.9433: 100%|██████████| 212/212 [19:08<00:00,  5.42s/it]


Epoch [1/30], Training Loss: 1.9433, Validation Accuracy: 69.06%


Epoch [2/30], Training Loss: 1.0802: 100%|██████████| 212/212 [19:30<00:00,  5.52s/it]


Epoch [2/30], Training Loss: 1.0802, Validation Accuracy: 80.38%


Epoch [3/30], Training Loss: 0.5576:  66%|██████▌   | 139/212 [14:10<07:49,  6.44s/it]

In [None]:
# Test the model
correct = 0
total = 0
with torch.no_grad():
    for data in ts_dl:
        images, labels = data
        outputs = model(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.to(device)).sum().item()

print('Accuracy on the test images: %d %%' % (100 * correct / total))

### Transfer Learning Methods Using ResNet18 Model

Transfer learning is a machine learning technique where a model trained on one task is reused as the starting point for a model on a second task. Instead of training a model from scratch, which can be time-consuming and computationally expensive, transfer learning leverages knowledge gained from solving one problem and applies it to a different but related problem.

1. **Pre-trained Model**: Start with a model that has been trained on a large dataset for a specific task, such as image classification. This model has learned to extract general features from the data that are useful for solving that task.

2. **Task-Specific Adaptation**: Instead of training the model from scratch, you adapt the pre-trained model to your specific task by fine-tuning it on a smaller dataset related to the new task. This process involves adjusting the model's parameters to better fit the new data while retaining the valuable knowledge learned during the initial training.

Transfer learning is especially useful when you have a limited amount of data for your target task or when training a model from scratch would be impractical due to resource constraints.

ResNet, short for Residual Network, is a specific type of deep neural network architecture that was introduced to address the problem of vanishing gradients during training. The vanishing gradient problem occurs when gradients become extremely small as they propagate backward through many layers of a neural network, making it difficult to train deep networks effectively.

**ResNet** introduces a concept called residual learning, where each layer in the network learns to predict the residual between the input and the output of the layer, rather than directly trying to learn the desired underlying mapping. This is achieved through the use of skip connections, also known as shortcut connections or identity mappings, which allow gradients to bypass certain layers. These connections enable the network to learn residual functions effectively, making it easier to train very deep neural networks.

The key innovation of ResNet is the residual block, which consists of a set of layers with a skip connection that adds the input to the output of the block. By adding these skip connections, ResNet can effectively train extremely deep networks with hundreds or even thousands of layers while avoiding the vanishing gradient problem. This architecture has been highly successful in various computer vision tasks, including image classification, object detection, and image segmentation.

In [None]:
# Pre-trained Model
import torchvision.models as models
pretrained_model = models.resnet152(weights='IMAGENET1K_V2')
num_classes = 53
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, num_classes) ## Changing Output layer to our use case 53 classes

# Cost Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(pretrained_model.parameters(), lr=0.001, momentum=0.9) # Momentum is Beta value 


In [None]:
# Training
def train(model, train_loader, criterion, optimizer, num_epochs=14):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

In [None]:
def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total
    print(f"Accuracy: {accuracy:.2f}")

In [None]:
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pretrained_model.to(device)
# Model Training
train(pretrained_model, tr_dl, criterion, optimizer)

# Model Evaluation
evaluate(pretrained_model, ts_dl)

### Accuracy Comparison

#### Accuracy in Model trained from scratch : 80 - 85 %

#### Accuracy Use Pre-trained ResNet152 Model : > 95 %