In [1]:
import os
import cv2
import numpy as np
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
from torchvision import transforms, models
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# Dataset loader for Tonji
class PalmROIDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.img_list = sorted([f for f in os.listdir(root_dir) if f.endswith(".bmp")])
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_name = self.img_list[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            raise ValueError(f"Failed to load image: {img_path}")
        image = cv2.resize(image, (128, 128))  
        image = image[..., np.newaxis]  
        image = image.astype(np.float32) / 255.0  
        image = torch.from_numpy(image).permute(2, 0, 1) 
        if self.transform:
            image = self.transform(image)
        # Extract person ID (e.g., '00001.bmp' to '00020.bmp' -> person 1)
        try:
            img_number = int(img_name.split(".")[0])  
            person_id = ((img_number - 1) // 20) + 1 
            label = person_id - 1  
            if not (0 <= label <= 299):
                raise ValueError(f"Label {label + 1} out of range (1 to 300) for {img_name}")
        except (ValueError, IndexError):
            raise ValueError(f"Invalid filename format for label: {img_name}")
        return image, label, img_name

In [3]:
# VGG-16 with Batch Normalization (pretrained, modified for grayscale and 128x128 input)
class VGG16(nn.Module):
    def __init__(self, num_classes=300):
        super(VGG16, self).__init__()
        # Load pretrained VGG-16 with batch normalization
        vgg16_bn = models.vgg16_bn(pretrained=True)
        # Modify first convolutional layer for 1-channel input
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),  
            *vgg16_bn.features[1:]  
        )
        # Copy pretrained weights for conv1 (average RGB channels)
        with torch.no_grad():
            self.features[0].weight.copy_(vgg16_bn.features[0].weight.mean(dim=1, keepdim=True))
            self.features[0].bias.copy_(vgg16_bn.features[0].bias)
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))  
        self.classifier = vgg16_bn.classifier
        # Modify classifier for 300 classes (input size adjusted for 4x4 pooling)
        self.classifier[0] = nn.Linear(512 * 4 * 4, 4096)  
        self.classifier[-1] = nn.Linear(4096, num_classes)
        # Initialize new classifier layers
        nn.init.xavier_uniform_(self.classifier[0].weight)
        nn.init.zeros_(self.classifier[0].bias)
        nn.init.xavier_uniform_(self.classifier[-1].weight)
        nn.init.zeros_(self.classifier[-1].bias)

    def forward(self, x, return_features=False):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if return_features:
            x = self.classifier[:6](x)  
            return x
        x = self.classifier(x)
        return x

In [4]:
# Function to extract features from a dataloader
def extract_features(model, dataloader, device):
    model.eval()
    features_list = []
    labels_list = []
    img_names = []
    with torch.no_grad():
        for x, y, names in tqdm(dataloader, desc="Extracting features"):
            x = x.to(device)
            f = model(x, return_features=True).cpu().numpy()
            features_list.append(f)
            labels_list.extend(y.numpy())
            img_names.extend(names)
    features = np.concatenate(features_list, axis=0)
    labels = np.array(labels_list)
    return features, labels, img_names

In [5]:
# Function to fine-tune VGG-16 on Tonji
def fine_tune_vgg16(model, train_loader, device, epochs=50):
    criterion = nn.CrossEntropyLoss()
    # Differential learning rates for pretrained and new layers
    optimizer = optim.Adam([
        {'params': model.features[0].parameters(), 'lr': 0.001},  
        {'params': model.features[1:].parameters(), 'lr': 0.0001},  
        {'params': model.classifier.parameters(), 'lr': 0.001},  
    ], lr=0.001)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
    model.train()
    prev_lr = [group['lr'] for group in optimizer.param_groups]
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for x, y, _ in tqdm(train_loader, desc=f"Fine-Tuning Epoch {epoch+1}"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Fine-Tuning Epoch {epoch+1}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
        scheduler.step(avg_loss)
        # Log learning rate changes
        current_lr = [group['lr'] for group in optimizer.param_groups]
        if any(lr != prev_lr[i] for i, lr in enumerate(current_lr)):
            print(f"Epoch {epoch+1}: Learning rates updated to {current_lr}")
        prev_lr = current_lr

In [6]:
# Compute metrics (Top-1, Top-5, ROC AUC, EER, FAR, FRR)
def compute_metrics(features, labels, clf):
    decision_scores = clf.decision_function(features)  
    topk_acc = {}
    for k in [1, 5]:
        top_k_indices = np.argsort(decision_scores, axis=1)[:, -k:]
        correct = 0
        for i, top_k in enumerate(top_k_indices):
            if labels[i] in top_k:
                correct += 1
        topk_acc[k] = correct / len(labels)
    y_true, y_score = [], []
    for i in range(len(labels)):
        for j in range(i + 1, len(labels)):
            score_i = decision_scores[i, labels[i]]
            score_j = decision_scores[j, labels[j]]
            sim = score_i + score_j
            y_score.append(sim)
            y_true.append(1 if labels[i] == labels[j] else 0)
    if len(set(y_true)) <= 1:
        print("Warning: y_true contains only one class. Setting ROC AUC, EER, FAR, FRR to 0.")
        roc_auc = 0.1
        eer = 0.1
        far = 0.1
        frr = 0.1
    else:
        fpr, tpr, thresholds = roc_curve(y_true, y_score)
        roc_auc = auc(fpr, tpr)
        fnr = 1 - tpr
        diff = np.abs(fnr - fpr)
        if np.all(np.isnan(diff)):
            print("Warning: All-NaN slice in EER calculation. Setting EER, FAR, FRR to 0.")
            eer = 0.1
            far = 0.1
            frr = 0.1
        else:
            eer_idx = np.nanargmin(diff)
            eer = (fpr[eer_idx] + fnr[eer_idx]) / 2
            far = fpr[eer_idx]
            frr = fnr[eer_idx]
    return topk_acc, roc_auc, eer, far, frr

In [7]:
# Main code
session1_root = r"C:\Users\hiteshk\Desktop\Deep Learning Approaches for roi extraction and using same for palm print recognisation\Tonji\ROI\session1"
session2_root = r"C:\Users\hiteshk\Desktop\Deep Learning Approaches for roi extraction and using same for palm print recognisation\Tonji\ROI\session2"

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
# Data augmentation for Tongji
transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
])

In [10]:
# Create Tongji datasets
dataset1 = PalmROIDataset(session1_root, transform=transform)
dataset2 = PalmROIDataset(session2_root, transform=transform)

In [11]:
# Verify dataset sizes
print(f"Session 1 images: {len(dataset1)}")
print(f"Session 2 images: {len(dataset2)}")

Session 1 images: 6000
Session 2 images: 6000


In [12]:
# Combine datasets
full_dataset = ConcatDataset([dataset1, dataset2])
print(f"Total images: {len(full_dataset)}")

Total images: 12000


In [13]:
# Split into train and test (80/20)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

In [14]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [15]:
# Initialize and fine-tune VGG-16
model = VGG16(num_classes=300).to(device)
fine_tune_vgg16(model, train_loader, device, epochs=10)

Fine-Tuning Epoch 1: 100%|██████████| 300/300 [01:59<00:00,  2.50it/s]


Fine-Tuning Epoch 1, Loss: 5.7191, Accuracy: 0.36%


Fine-Tuning Epoch 2: 100%|██████████| 300/300 [01:46<00:00,  2.80it/s]


Fine-Tuning Epoch 2, Loss: 5.0985, Accuracy: 2.84%


Fine-Tuning Epoch 3: 100%|██████████| 300/300 [01:49<00:00,  2.74it/s]


Fine-Tuning Epoch 3, Loss: 3.6099, Accuracy: 17.55%


Fine-Tuning Epoch 4: 100%|██████████| 300/300 [01:49<00:00,  2.74it/s]


Fine-Tuning Epoch 4, Loss: 1.7448, Accuracy: 54.20%


Fine-Tuning Epoch 5: 100%|██████████| 300/300 [01:47<00:00,  2.78it/s]


Fine-Tuning Epoch 5, Loss: 0.6476, Accuracy: 81.50%


Fine-Tuning Epoch 6: 100%|██████████| 300/300 [01:48<00:00,  2.78it/s]


Fine-Tuning Epoch 6, Loss: 0.2919, Accuracy: 91.78%


Fine-Tuning Epoch 7: 100%|██████████| 300/300 [01:48<00:00,  2.77it/s]


Fine-Tuning Epoch 7, Loss: 0.1949, Accuracy: 94.89%


Fine-Tuning Epoch 8: 100%|██████████| 300/300 [01:51<00:00,  2.69it/s]


Fine-Tuning Epoch 8, Loss: 0.1835, Accuracy: 94.82%


Fine-Tuning Epoch 9: 100%|██████████| 300/300 [01:49<00:00,  2.74it/s]


Fine-Tuning Epoch 9, Loss: 0.1186, Accuracy: 96.65%


Fine-Tuning Epoch 10: 100%|██████████| 300/300 [01:48<00:00,  2.76it/s]

Fine-Tuning Epoch 10, Loss: 0.1324, Accuracy: 96.51%





In [16]:
# Extract features
train_features, train_labels, _ = extract_features(model, train_loader, device)
test_features, test_labels, _ = extract_features(model, test_loader, device)

Extracting features: 100%|██████████| 300/300 [00:51<00:00,  5.80it/s]
Extracting features: 100%|██████████| 75/75 [00:15<00:00,  4.97it/s]


In [17]:
# Verify number of classes
num_classes = len(np.unique(np.concatenate((train_labels, test_labels))))
print(f"Number of classes: {num_classes}")
if num_classes != 300:
    raise ValueError(f"Expected 300 classes, but found {num_classes}. Verify filename format (e.g., '00001.bmp' to '12000.bmp').")

Number of classes: 300


In [18]:
# Normalize features
train_features = train_features / np.linalg.norm(train_features, axis=1, keepdims=True)
test_features = test_features / np.linalg.norm(test_features, axis=1, keepdims=True)

In [19]:
# Train SVM with RBF kernel and hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto', 0.01, 0.1]
}

In [20]:
svm_clf = SVC(kernel='rbf', decision_function_shape='ovr', probability=False)
grid_search = GridSearchCV(svm_clf, param_grid, cv=3, n_jobs=-1)
grid_search.fit(train_features, train_labels)
print(f"Best SVM parameters: {grid_search.best_params_}")

Best SVM parameters: {'C': 10, 'gamma': 'scale'}


In [21]:
# Use best estimator
svm_clf = grid_search.best_estimator_

In [22]:
# Compute metrics
topk_acc, roc_auc, eer, far, frr = compute_metrics(test_features, test_labels, svm_clf)

In [23]:
#Print metrics
print(f"Top-1 Identification Accuracy: {topk_acc[1]*100:.2f}%")
print(f"Top-5 Identification Accuracy: {topk_acc[5]*100:.2f}%")
print(f"ROC AUC: {roc_auc:.4f}")
print(f"EER: {eer:.4f}")
print(f"FAR: {far:.4f}")
print(f"FRR: {frr:.4f}")

Top-1 Identification Accuracy: 99.54%
Top-5 Identification Accuracy: 99.79%
ROC AUC: 0.4984
EER: 0.5030
FAR: 0.5029
FRR: 0.5030
