In [1]:
import os
import cv2
import numpy as np
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
from torchvision import models, transforms
from tqdm import tqdm

In [2]:
# Dataset loader for Tongji
class PalmROIDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.img_list = sorted([f for f in os.listdir(root_dir) if f.endswith(".bmp")])
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_name = self.img_list[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            raise ValueError(f"Failed to load image: {img_path}")
        image = cv2.resize(image, (224, 224))  
        image = image[..., np.newaxis]  
        image = image.astype(np.float32) / 255.0  
        image = torch.from_numpy(image).permute(2, 0, 1)  
        if self.transform:
            image = self.transform(image)
        try:
            img_number = int(img_name.split(".")[0])  
            person_id = ((img_number - 1) // 20) + 1  
            label = person_id - 1  
            if not (0 <= label <= 299):
                raise ValueError(f"Label {label + 1} out of range (1 to 300) for {img_name}")
        except (ValueError, IndexError):
            raise ValueError(f"Invalid filename format for label: {img_name}")
        return image, label, img_name

In [3]:
# Modified AlexNet for grayscale input
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        original_model = models.alexnet(pretrained=True)
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2), 
            *original_model.features[1:]  
        )
        # Copy weights for conv1 (average RGB weights for grayscale)
        with torch.no_grad():
            self.features[0].weight.copy_(original_model.features[0].weight.mean(dim=1, keepdim=True))
            self.features[0].bias.copy_(original_model.features[0].bias)
        self.avgpool = original_model.avgpool
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x, return_features=False):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        if return_features:
            x = self.classifier[:6](x) 
            return x
        x = self.classifier(x)
        return x

In [4]:
# Function to fine-tune AlexNet on Tonji
def finetune_alexnet(model, train_loader, device, epochs=12):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)  
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for x, y, _ in tqdm(train_loader, desc=f"Fine-tuning Epoch {epoch+1}"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
        print(f"Fine-tuning Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {100 * correct/total:.2f}%")

In [5]:
# Function to extract features from a dataloader
def extract_features(model, dataloader, device):
    model.eval()
    features_list = []
    labels_list = []
    img_names = []
    with torch.no_grad():
        for x, y, names in tqdm(dataloader, desc="Extracting features"):
            x = x.to(device)
            f = model(x, return_features=True).cpu().numpy()
            features_list.append(f)
            labels_list.extend(y.numpy())
            img_names.extend(names)
    features = np.concatenate(features_list, axis=0)
    labels = np.array(labels_list)
    return features, labels, img_names

In [6]:
# Compute metrics (Top-1, Top-5, ROC AUC, EER, FAR, FRR)
def compute_metrics(features, labels, clf):
    decision_scores = clf.decision_function(features)  
    topk_acc = {}
    for k in [1, 5]:
        top_k_indices = np.argsort(decision_scores, axis=1)[:, -k:]
        correct = 0
        for i, top_k in enumerate(top_k_indices):
            if labels[i] in top_k:
                correct += 1
        topk_acc[k] = correct / len(labels)
    y_true, y_score = [], []
    for i in range(len(labels)):
        for j in range(i + 1, len(labels)):
            score_i = decision_scores[i, labels[i]]
            score_j = decision_scores[j, labels[j]]
            sim = score_i + score_j
            y_score.append(sim)
            y_true.append(1 if labels[i] == labels[j] else 0)
    if len(set(y_true)) <= 1:
        print("Warning: y_true contains only one class. Setting ROC AUC, EER, FAR, FRR to 0.")
        roc_auc = 0.1
        eer = 0.1
        far = 0.1
        frr = 0.1
    else:
        fpr, tpr, thresholds = roc_curve(y_true, y_score)
        roc_auc = auc(fpr, tpr)
        fnr = 1 - tpr
        diff = np.abs(fnr - fpr)
        if np.all(np.isnan(diff)):
            print("Warning: All-NaN slice in EER calculation. Setting EER, FAR, FRR to 0.")
            eer = 0.1
            far = 0.1
            frr = 0.1
        else:
            eer_idx = np.nanargmin(diff)
            eer = (fpr[eer_idx] + fnr[eer_idx]) / 2
            far = fpr[eer_idx]
            frr = fnr[eer_idx]
    return topk_acc, roc_auc, eer, far, frr

In [7]:
session1_root = r"C:\Users\hiteshk\Desktop\Deep Learning Approaches for roi extraction and using same for palm print recognisation\Tonji\ROI\session1"
session2_root = r"C:\Users\hiteshk\Desktop\Deep Learning Approaches for roi extraction and using same for palm print recognisation\Tonji\ROI\session2"

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
# Data augmentation for Tonji
transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
])

In [10]:
# Create Tongji datasets
dataset1 = PalmROIDataset(session1_root, transform=transform)
dataset2 = PalmROIDataset(session2_root, transform=transform)

In [11]:
# Verify dataset sizes
print(f"Session 1 images: {len(dataset1)}")
print(f"Session 2 images: {len(dataset2)}")

Session 1 images: 6000
Session 2 images: 6000


In [12]:
# Combine datasets
full_dataset = ConcatDataset([dataset1, dataset2])
print(f"Total images: {len(full_dataset)}")

Total images: 12000


In [13]:
#Split into train and test (80/20)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

In [14]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [15]:
model = AlexNet(num_classes=1000).to(device)  
model.classifier[6] = nn.Linear(4096, 300).to(device)  



In [16]:
# Fine-tune AlexNet on Tonji
finetune_alexnet(model, train_loader, device, epochs=30)

Fine-tuning Epoch 1: 100%|██████████| 150/150 [00:55<00:00,  2.71it/s]


Fine-tuning Epoch 1, Loss: 5.7009, Accuracy: 0.27%


Fine-tuning Epoch 2: 100%|██████████| 150/150 [00:50<00:00,  2.97it/s]


Fine-tuning Epoch 2, Loss: 5.5978, Accuracy: 0.55%


Fine-tuning Epoch 3: 100%|██████████| 150/150 [00:50<00:00,  2.97it/s]


Fine-tuning Epoch 3, Loss: 5.4198, Accuracy: 1.41%


Fine-tuning Epoch 4: 100%|██████████| 150/150 [00:59<00:00,  2.52it/s]


Fine-tuning Epoch 4, Loss: 5.1325, Accuracy: 2.20%


Fine-tuning Epoch 5: 100%|██████████| 150/150 [01:02<00:00,  2.42it/s]


Fine-tuning Epoch 5, Loss: 4.8107, Accuracy: 4.05%


Fine-tuning Epoch 6: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 6, Loss: 4.4459, Accuracy: 7.34%


Fine-tuning Epoch 7: 100%|██████████| 150/150 [01:01<00:00,  2.42it/s]


Fine-tuning Epoch 7, Loss: 4.0195, Accuracy: 11.69%


Fine-tuning Epoch 8: 100%|██████████| 150/150 [01:01<00:00,  2.44it/s]


Fine-tuning Epoch 8, Loss: 3.5054, Accuracy: 18.03%


Fine-tuning Epoch 9: 100%|██████████| 150/150 [01:01<00:00,  2.44it/s]


Fine-tuning Epoch 9, Loss: 2.8638, Accuracy: 28.30%


Fine-tuning Epoch 10: 100%|██████████| 150/150 [01:01<00:00,  2.44it/s]


Fine-tuning Epoch 10, Loss: 2.2477, Accuracy: 41.50%


Fine-tuning Epoch 11: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 11, Loss: 1.6720, Accuracy: 54.07%


Fine-tuning Epoch 12: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 12, Loss: 1.2453, Accuracy: 64.45%


Fine-tuning Epoch 13: 100%|██████████| 150/150 [01:01<00:00,  2.42it/s]


Fine-tuning Epoch 13, Loss: 0.9464, Accuracy: 72.60%


Fine-tuning Epoch 14: 100%|██████████| 150/150 [01:02<00:00,  2.39it/s]


Fine-tuning Epoch 14, Loss: 0.7492, Accuracy: 78.04%


Fine-tuning Epoch 15: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 15, Loss: 0.5805, Accuracy: 82.68%


Fine-tuning Epoch 16: 100%|██████████| 150/150 [01:01<00:00,  2.44it/s]


Fine-tuning Epoch 16, Loss: 0.4939, Accuracy: 85.03%


Fine-tuning Epoch 17: 100%|██████████| 150/150 [01:01<00:00,  2.42it/s]


Fine-tuning Epoch 17, Loss: 0.3900, Accuracy: 87.96%


Fine-tuning Epoch 18: 100%|██████████| 150/150 [01:01<00:00,  2.45it/s]


Fine-tuning Epoch 18, Loss: 0.3339, Accuracy: 89.76%


Fine-tuning Epoch 19: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 19, Loss: 0.2899, Accuracy: 90.94%


Fine-tuning Epoch 20: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 20, Loss: 0.2415, Accuracy: 92.40%


Fine-tuning Epoch 21: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]


Fine-tuning Epoch 21, Loss: 0.2371, Accuracy: 92.52%


Fine-tuning Epoch 22: 100%|██████████| 150/150 [00:49<00:00,  3.05it/s]


Fine-tuning Epoch 22, Loss: 0.1916, Accuracy: 94.16%


Fine-tuning Epoch 23: 100%|██████████| 150/150 [00:49<00:00,  3.01it/s]


Fine-tuning Epoch 23, Loss: 0.1955, Accuracy: 94.19%


Fine-tuning Epoch 24: 100%|██████████| 150/150 [00:51<00:00,  2.89it/s]


Fine-tuning Epoch 24, Loss: 0.1732, Accuracy: 94.46%


Fine-tuning Epoch 25: 100%|██████████| 150/150 [00:53<00:00,  2.80it/s]


Fine-tuning Epoch 25, Loss: 0.1504, Accuracy: 95.11%


Fine-tuning Epoch 26: 100%|██████████| 150/150 [00:50<00:00,  2.99it/s]


Fine-tuning Epoch 26, Loss: 0.1580, Accuracy: 95.19%


Fine-tuning Epoch 27: 100%|██████████| 150/150 [00:51<00:00,  2.92it/s]


Fine-tuning Epoch 27, Loss: 0.1301, Accuracy: 95.84%


Fine-tuning Epoch 28: 100%|██████████| 150/150 [00:50<00:00,  2.96it/s]


Fine-tuning Epoch 28, Loss: 0.1258, Accuracy: 96.03%


Fine-tuning Epoch 29: 100%|██████████| 150/150 [00:50<00:00,  2.98it/s]


Fine-tuning Epoch 29, Loss: 0.1220, Accuracy: 96.07%


Fine-tuning Epoch 30: 100%|██████████| 150/150 [00:51<00:00,  2.93it/s]

Fine-tuning Epoch 30, Loss: 0.0999, Accuracy: 96.83%





In [17]:
# Extract features
train_features, train_labels, _ = extract_features(model, train_loader, device)
test_features, test_labels, _ = extract_features(model, test_loader, device)

Extracting features: 100%|██████████| 150/150 [00:46<00:00,  3.24it/s]
Extracting features: 100%|██████████| 38/38 [00:14<00:00,  2.66it/s]


In [18]:
# Verify number of classes
num_classes = len(np.unique(np.concatenate((train_labels, test_labels))))
print(f"Number of classes: {num_classes}")
if num_classes != 300:
    raise ValueError(f"Expected 300 classes, but found {num_classes}. Verify filename format (e.g., '00001.bmp' to '12000.bmp').")

Number of classes: 300


In [19]:
# Normalize features
train_features = train_features / np.linalg.norm(train_features, axis=1, keepdims=True)
test_features = test_features / np.linalg.norm(test_features, axis=1, keepdims=True)

In [20]:
# Train SVM with RBF kernel and hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto', 0.01, 0.1]
}

In [22]:
svm_clf = SVC(kernel='rbf', decision_function_shape='ovr', probability=False)
grid_search = GridSearchCV(svm_clf, param_grid, cv=3, n_jobs=-1)
grid_search.fit(train_features, train_labels)
print(f"Best SVM parameters: {grid_search.best_params_}")

Best SVM parameters: {'C': 10, 'gamma': 'scale'}


In [23]:
# Use best estimator
svm_clf = grid_search.best_estimator_

In [24]:
#Compute metrics
topk_acc, roc_auc, eer, far, frr = compute_metrics(test_features, test_labels, svm_clf)

In [25]:
# Print metrics
print(f"Top-1 Identification Accuracy: {topk_acc[1]*100:.2f}%")
print(f"Top-5 Identification Accuracy: {topk_acc[5]*100:.2f}%")
print(f"ROC AUC: {roc_auc:.4f}")
print(f"EER: {eer:.4f}")
print(f"FAR: {far:.4f}")
print(f"FRR: {frr:.4f}")

Top-1 Identification Accuracy: 95.54%
Top-5 Identification Accuracy: 99.38%
ROC AUC: 0.4958
EER: 0.5039
FAR: 0.5039
FRR: 0.5039
