In [1]:
import os
import cv2
import numpy as np
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
from torchvision import transforms, models
from tqdm import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# Dataset loader for Tonji
class PalmROIDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.img_list = sorted([f for f in os.listdir(root_dir) if f.endswith(".bmp")])
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, idx):
        img_name = self.img_list[idx]
        img_path = os.path.join(self.root_dir, img_name)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            raise ValueError(f"Failed to load image: {img_path}")
        image = cv2.resize(image, (224, 224))  # GoogLeNet expects 224x224 input
        image = image[..., np.newaxis]
        image = image.astype(np.float32) / 255.0
        image = torch.from_numpy(image).permute(2, 0, 1)
        if self.transform:
            image = self.transform(image)
        # Extract person ID
        try:
            img_number = int(img_name.split(".")[0])
            person_id = ((img_number - 1) // 20) + 1
            label = person_id - 1
            if not (0 <= label <= 299):
                raise ValueError(f"Label {label + 1} out of range (1 to 300) for {img_name}")
        except (ValueError, IndexError):
            raise ValueError(f"Invalid filename format for label: {img_name}")
        return image, label, img_name

In [3]:
class GoogLeNetFeature(nn.Module):
    def __init__(self, num_classes=300):
        super(GoogLeNetFeature, self).__init__()
        # Load pretrained GoogLeNet
        self.googlenet = models.googlenet(weights='IMAGENET1K_V1')
        # Average pretrained conv1 weights for grayscale adaptation
        with torch.no_grad():
            pretrained_weight = self.googlenet.conv1.conv.weight  
            averaged_weight = pretrained_weight.mean(dim=1, keepdim=True)  
            replicated_weight = averaged_weight.repeat(1, 3, 1, 1)  
            self.googlenet.conv1.conv.weight.copy_(replicated_weight)
        # Create feature extractor (all layers except final fc)
        self.feature_extractor = nn.Sequential(*list(self.googlenet.children())[:-1])
        self.feature_extractor.add_module('dropout', self.googlenet.dropout)
        # Custom classifier
        self.classifier = nn.Linear(1024, num_classes)
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.zeros_(self.classifier.bias)

    def forward(self, x, return_features=False):
        # Replicate 1-channel to 3 channels
        x = x.repeat(1, 3, 1, 1)
        # Extract features
        x = self.feature_extractor(x)
        # Flatten to [batch_size, 1024]
        x = torch.flatten(x, 1)
        if return_features:
            return x  
        # Apply classifier
        x = self.classifier(x)  
        return x

In [4]:
def fine_tune_googlenet(model, train_loader, device, epochs=40):
    criterion = nn.CrossEntropyLoss()
    # Optimizer with differential learning rates (no overlap)
    optimizer = optim.Adam([
        {'params': model.feature_extractor.parameters(), 'lr': 0.0001},  
        {'params': model.classifier.parameters(), 'lr': 0.001},         
    ])
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
    model.train()
    prev_lr = [group['lr'] for group in optimizer.param_groups]
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for x, y, _ in tqdm(train_loader, desc=f"Fine-Tuning Epoch {epoch+1}"):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        print(f"Fine-Tuning Epoch {epoch+1}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
        scheduler.step(avg_loss)
        current_lr = [group['lr'] for group in optimizer.param_groups]
        if any(lr != prev_lr[i] for i, lr in enumerate(current_lr)):
            print(f"Epoch {epoch+1}: Learning rates updated to {current_lr}")
        prev_lr = current_lr

In [5]:
# Function to extract features
def extract_features(model, dataloader, device):
    model.eval()
    features_list = []
    labels_list = []
    img_names = []
    with torch.no_grad():
        for x, y, names in tqdm(dataloader, desc="Extracting features"):
            x = x.to(device)
            f = model(x, return_features=True).cpu().numpy()
            features_list.append(f)
            labels_list.extend(y.numpy())
            img_names.extend(names)
    features = np.concatenate(features_list, axis=0)
    labels = np.array(labels_list)
    return features, labels, img_names

In [6]:
# Compute metrics
def compute_metrics(features, labels, clf):
    decision_scores = clf.decision_function(features)
    topk_acc = {}
    for k in [1, 5]:
        top_k_indices = np.argsort(decision_scores, axis=1)[:, -k:]
        correct = 0
        for i, top_k in enumerate(top_k_indices):
            if labels[i] in top_k:
                correct += 1
        topk_acc[k] = correct / len(labels)
    y_true, y_score = [], []
    for i in range(len(labels)):
        for j in range(i + 1, len(labels)):
            score_i = decision_scores[i, labels[i]]
            score_j = decision_scores[j, labels[j]]
            sim = score_i + score_j
            y_score.append(sim)
            y_true.append(1 if labels[i] == labels[j] else 0)
    if len(set(y_true)) <= 1:
        print("Warning: y_true contains only one class. Setting ROC AUC, EER, FAR, FRR to 0.1.")
        roc_auc = 0.1
        eer = 0.1
        far = 0.1
        frr = 0.1
    else:
        fpr, tpr, thresholds = roc_curve(y_true, y_score)
        roc_auc = auc(fpr, tpr)
        fnr = 1 - tpr
        diff = np.abs(fnr - fpr)
        if np.all(np.isnan(diff)):
            print("Warning: All-NaN slice in EER calculation. Setting EER, FAR, FRR to 0.1.")
            eer = 0.1
            far = 0.1
            frr = 0.1
        else:
            eer_idx = np.nanargmin(diff)
            eer = (fpr[eer_idx] + fnr[eer_idx]) / 2
            far = fpr[eer_idx]
            frr = fnr[eer_idx]
    return topk_acc, roc_auc, eer, far, frr

In [7]:
# Main code
session1_root = r"C:\Users\hiteshk\Desktop\Deep Learning Approaches for roi extraction and using same for palm print recognisation\Tonji\ROI\session1"
session2_root = r"C:\Users\hiteshk\Desktop\Deep Learning Approaches for roi extraction and using same for palm print recognisation\Tonji\ROI\session2"

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [9]:
# Data augmentation for Tonji
transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
])

In [10]:
# Create Tongji datasets
dataset1 = PalmROIDataset(session1_root, transform=transform)
dataset2 = PalmROIDataset(session2_root, transform=transform)


In [11]:
# Verify dataset sizes
print(f"Session 1 images: {len(dataset1)}")
print(f"Session 2 images: {len(dataset2)}")

Session 1 images: 6000
Session 2 images: 6000


In [12]:
# Combine datasets
full_dataset = ConcatDataset([dataset1, dataset2])
print(f"Total images: {len(full_dataset)}")

Total images: 12000


In [13]:
# Split into train and test (80/20)
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

In [14]:
# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [15]:
model = GoogLeNetFeature(num_classes=300).to(device)
fine_tune_googlenet(model, train_loader, device, epochs=15)

Fine-Tuning Epoch 1: 100%|██████████| 300/300 [01:42<00:00,  2.94it/s]


Fine-Tuning Epoch 1, Loss: 5.0340, Accuracy: 8.67%


Fine-Tuning Epoch 2: 100%|██████████| 300/300 [01:46<00:00,  2.80it/s]


Fine-Tuning Epoch 2, Loss: 1.5430, Accuracy: 72.35%


Fine-Tuning Epoch 3: 100%|██████████| 300/300 [01:40<00:00,  2.98it/s]


Fine-Tuning Epoch 3, Loss: 0.3165, Accuracy: 97.10%


Fine-Tuning Epoch 4: 100%|██████████| 300/300 [01:36<00:00,  3.10it/s]


Fine-Tuning Epoch 4, Loss: 0.1082, Accuracy: 99.34%


Fine-Tuning Epoch 5: 100%|██████████| 300/300 [01:37<00:00,  3.08it/s]


Fine-Tuning Epoch 5, Loss: 0.0515, Accuracy: 99.86%


Fine-Tuning Epoch 6: 100%|██████████| 300/300 [01:37<00:00,  3.07it/s]


Fine-Tuning Epoch 6, Loss: 0.0326, Accuracy: 99.95%


Fine-Tuning Epoch 7: 100%|██████████| 300/300 [01:36<00:00,  3.11it/s]


Fine-Tuning Epoch 7, Loss: 0.0217, Accuracy: 99.92%


Fine-Tuning Epoch 8: 100%|██████████| 300/300 [01:37<00:00,  3.08it/s]


Fine-Tuning Epoch 8, Loss: 0.0146, Accuracy: 99.99%


Fine-Tuning Epoch 9: 100%|██████████| 300/300 [01:37<00:00,  3.07it/s]


Fine-Tuning Epoch 9, Loss: 0.0097, Accuracy: 100.00%


Fine-Tuning Epoch 10: 100%|██████████| 300/300 [01:38<00:00,  3.05it/s]


Fine-Tuning Epoch 10, Loss: 0.0100, Accuracy: 99.95%


Fine-Tuning Epoch 11: 100%|██████████| 300/300 [01:37<00:00,  3.09it/s]


Fine-Tuning Epoch 11, Loss: 0.0062, Accuracy: 100.00%


Fine-Tuning Epoch 12: 100%|██████████| 300/300 [01:36<00:00,  3.11it/s]


Fine-Tuning Epoch 12, Loss: 0.0076, Accuracy: 99.96%


Fine-Tuning Epoch 13: 100%|██████████| 300/300 [01:38<00:00,  3.03it/s]


Fine-Tuning Epoch 13, Loss: 0.0068, Accuracy: 99.97%


Fine-Tuning Epoch 14: 100%|██████████| 300/300 [01:37<00:00,  3.07it/s]


Fine-Tuning Epoch 14, Loss: 0.0057, Accuracy: 99.99%


Fine-Tuning Epoch 15: 100%|██████████| 300/300 [01:36<00:00,  3.10it/s]

Fine-Tuning Epoch 15, Loss: 0.0036, Accuracy: 99.99%





In [16]:
# Extract features
train_features, train_labels, _ = extract_features(model, train_loader, device)
test_features, test_labels, _ = extract_features(model, test_loader, device)

Extracting features: 100%|██████████| 300/300 [01:02<00:00,  4.80it/s]
Extracting features: 100%|██████████| 75/75 [00:18<00:00,  4.08it/s]


In [17]:
# Verify number of classes
num_classes = len(np.unique(np.concatenate((train_labels, test_labels))))
print(f"Number of classes: {num_classes}")
if num_classes != 300:
    raise ValueError(f"Expected 300 classes, but found {num_classes}. Verify filename format (e.g., '00001.bmp' to '12000.bmp').")

Number of classes: 300


In [18]:
# Normalize features
train_features = train_features / np.linalg.norm(train_features, axis=1, keepdims=True)
test_features = test_features / np.linalg.norm(test_features, axis=1, keepdims=True)

In [19]:
# Train SVM with RBF kernel and hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 'auto', 0.01, 0.1]
}

In [20]:
svm_clf = SVC(kernel='rbf', decision_function_shape='ovr', probability=False)
grid_search = GridSearchCV(svm_clf, param_grid, cv=3, n_jobs=-1)
grid_search.fit(train_features, train_labels)
print(f"Best SVM parameters: {grid_search.best_params_}")

Best SVM parameters: {'C': 1, 'gamma': 'scale'}


In [21]:
# Use best estimator
svm_clf = grid_search.best_estimator_

In [22]:
# Compute metrics
topk_acc, roc_auc, eer, far, frr = compute_metrics(test_features, test_labels, svm_clf)

In [23]:
# Print metrics
print(f"Top-1 Identification Accuracy: {topk_acc[1]*100:.2f}%")
print(f"Top-5 Identification Accuracy: {topk_acc[5]*100:.2f}%")
print(f"ROC AUC: {roc_auc:.4f}")
print(f"EER: {eer:.4f}")
print(f"FAR: {far:.4f}")
print(f"FRR: {frr:.4f}")

Top-1 Identification Accuracy: 99.88%
Top-5 Identification Accuracy: 99.96%
ROC AUC: 0.4929
EER: 0.5058
FAR: 0.5059
FRR: 0.5058
