In [38]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import torch.optim as optim
import torch.nn.functional as F
from tqdm.notebook import tqdm


In [39]:
class CardDataset(Dataset):
    def __init__(self, dir_path, transform=None):
        self.data = ImageFolder(dir_path, transform = transform)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
    

In [40]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [41]:
train_path = '/aul/homes/amaha038/DeepLearning/Datasets/Card_Dataset_Kaggle/train'
val_path = '/aul/homes/amaha038/DeepLearning/Datasets/Card_Dataset_Kaggle/valid/'

train_data = CardDataset(train_path, transform = transform)
val_data = CardDataset(val_path, transform = transform)

In [42]:
train_loader = DataLoader(train_data, batch_size = 32, shuffle=True)
val_loader = DataLoader(val_data, batch_size = 32, shuffle=False)

In [None]:
#Formula for Convolution Layer Operation
"""
nn.conv2d = (W - F + 2P)/S + 1,
where W = width of the input (image)
F = Filter size (kernel)
P = Padding
S = stride 
"""
#In our case
"""
Width = 224
F = 3
S = 1
P = 0

Hence, the output of self.conv1 = (224-3+0)/1 + 1 = 222
"""
#Note By Default: torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)

In [43]:
num_classes = 53
class SimpleCNNClassifier(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNNClassifier, self).__init__()

        self.conv1 = nn.Conv2d(3, 6, 3) # in_channels=3, out_channels=16, kernel_size=3 It gives 222
        self.pool = nn.MaxPool2d(2, 2) # image size 6*111*111(222/2)
        self.conv2 = nn.Conv2d(6, 16,3)  #  (111 - 3 + 1) = 109
        # pooling again with 16*54*54 (109/2)
        self.fc1 = nn.Linear(16*54*54, 256) #256 is selected manually
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x))) # (32, 6, 111, 111) #(Batchsize, channel, dimension_height, dimension_width)
        x = self.pool(F.relu(self.conv2(x))) # (32, 16, 54, 54)
        x = x.view(x.size(0), -1) # Flatten: (32, 16*54*54)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x 
        

In [44]:
model = SimpleCNNClassifier(num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [45]:
#training
epochs = 2
train_losses, val_losses = [], []

for epoch in range(epochs):

    #training
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in tqdm(train_loader, desc='Training loop'):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        #prediction
        _, preds = torch.max(outputs, 1)
        correct_train += (preds == labels).sum().item()
        total_train += labels.size(0)

        loss.backward()
        optimizer.step()
        running_loss += loss.item() * labels.size(0) # loss.item() gives the average loss per image in the current batch
    train_loss = running_loss / len(train_loader.dataset)
    train_losses.append(train_loss)
    train_acc = correct_train / total_train

    #validation phase

    model.eval()
    running_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc='Validation loop'):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)
            correct_val += (preds == labels).sum().item()
            total_val += labels.size(0)
            running_loss += loss.item() * labels.size(0)
    val_loss = running_loss / len(val_loader.dataset)
    val_losses.append(val_loss)
    val_acc = correct_val / total_val

    print(f"Epoch {epoch+1}/{epochs} - Train loss: {train_loss}, Val Loss: {val_loss}. Train Acc.: {train_acc}, Val Acc.: {val_acc}")
        
    

Training loop:   0%|          | 0/239 [00:00<?, ?it/s]

Validation loop:   0%|          | 0/9 [00:00<?, ?it/s]

Epoch 1/2 - Train loss: 3.2338026398001287, Val Loss: 2.1965630972160484. Train Acc.: 0.16448058761804826, Val Acc.: 0.3433962264150943


Training loop:   0%|          | 0/239 [00:00<?, ?it/s]

Validation loop:   0%|          | 0/9 [00:00<?, ?it/s]

Epoch 2/2 - Train loss: 2.018478906917672, Val Loss: 1.4396156153588924. Train Acc.: 0.4425498426023085, Val Acc.: 0.5773584905660377


In [None]:
#