In [1]:
import torch as t
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms 
from torch.utils.data import DataLoader, random_split, Dataset

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

import cv2 

from PIL import Image
import os

from collections import Counter
from torch.utils.data import WeightedRandomSampler


In [2]:
transform = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.2, 0.2),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((416, 416)),
    transforms.ToTensor()
])

dev_transform = transforms.Compose([
    transforms.Resize((416, 416)),
    transforms.ToTensor()
])

In [3]:
root = "/Users/abhin-zstch1563/Documents/AI/DL/CNN/mudra_project/Mudras/Green_mudras_10/images/train/Asamyukta_Hastas"

dataset = datasets.ImageFolder(root)

dataset.classes

# 1. Count images in each folder
counts = {}
for cls in os.listdir(root):
    cls_path = os.path.join(root, cls)
    if os.path.isdir(cls_path):
        counts[cls] = len(os.listdir(cls_path))
        print(cls, counts[cls])


Pathaka 1140
Suchi 1110
nothing_dataset_green_only 1115
Hamsasyam 2260
Alapadmam 2148
Mushti 1110
Trishulam 1184
Mrigasirsha 1120
Sikharam 1121
Ardhapathaka 2308
Katakamukha 1110


In [4]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
generator = t.Generator().manual_seed(42)
train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=generator)

train_ds.dataset.transform = transform
val_ds.dataset.transform = dev_transform

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32)


In [5]:

class MudraCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        def block(inp, out):
            return nn.Sequential(
                nn.Conv2d(inp, out, 3, padding=1),
                nn.ReLU(),
                nn.BatchNorm2d(out),
                nn.Conv2d(out, out, 3, padding=1),
                nn.ReLU(),
                nn.BatchNorm2d(out)
            )
        
        self.net = nn.Sequential(
            block(3, 32),
            nn.MaxPool2d(2),

            block(32, 64),
            nn.MaxPool2d(2),

            block(64, 128),
            nn.MaxPool2d(3),

            block(128, 256),
            nn.MaxPool2d(3),

            block(256, 256),
            nn.MaxPool2d(3),

            # nn.AdaptiveAvgPool2d(3)
        )
        self.fc = nn.Linear(256*3*3, num_classes)

    def forward(self, x):
        x = self.net(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


In [6]:
model = MudraCNN(num_classes=len(dataset.classes))
criterion = nn.CrossEntropyLoss()
optimizer = t.optim.Adam(model.parameters(), lr=1e-4)

device = "mps" if t.mps.is_available() else "cpu"
model.to(device)
device
# dataset.classes

'mps'

In [7]:
model.net(t.rand([32, 3, 416, 416]).to(device)).shape

torch.Size([32, 256, 3, 3])

In [8]:

for epoch in range(10):
    model.train()
    total_loss = 0
    correct_train, total_train = 0, 0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() 

        _, predicted = t.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()
        train_acc = (correct_train / total_train) * 100


    model.eval()
    correct_val, total_val = 0, 0

    with t.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, preds = t.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (preds == labels).sum().item()

    val_acc = 100 * correct_val / total_val
    print(f"Epoch {epoch+1} | Loss = {total_loss / len(train_loader)} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")



Epoch 1 | Loss = 0.591509780907994 | Train Acc: 81.64% | Val Acc: 93.96%
Epoch 2 | Loss = 0.11515180071636202 | Train Acc: 96.95% | Val Acc: 96.53%
Epoch 3 | Loss = 0.05742784823955221 | Train Acc: 98.43% | Val Acc: 97.33%
Epoch 4 | Loss = 0.06653741583568493 | Train Acc: 98.08% | Val Acc: 98.31%


KeyboardInterrupt: 

In [None]:
t.save({
    "state_dict": model.state_dict(),
    "classes": dataset.classes,
    "class_to_idx": dataset.class_to_idx
}, "hand_model_mudra_green_10_me.pth")
