# 1. Setup and Configuration

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, models, transforms
from sklearn.metrics import accuracy_score
from PIL import Image
import os
import copy
import random
import numpy as np

# --------------------------
# Set random seeds for reproducibility
# --------------------------
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# --------------------------
# Paths and device setup
# --------------------------
data_dir = "/kaggle/input/finger-prints-1-10/Finger print Dataset (1-10)"  # ⚠️ Replace if needed
augmented_dir = "augmented_data"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"✅ Using device: {device}")


✅ Using device: cuda


# 2. Data Augmentation (10× Each Image)

In [3]:
augment = transforms.Compose([
    transforms.RandomResizedCrop(300, scale=(0.8, 1.0)),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),
    transforms.RandomAffine(degrees=0, shear=10, translate=(0.1, 0.1)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomPerspective(distortion_scale=0.4, p=0.7),
    transforms.RandomAdjustSharpness(sharpness_factor=2),
])

if not os.path.exists(augmented_dir):
    print("🧬 Creating augmented dataset...")
    os.makedirs(augmented_dir, exist_ok=True)
    
    for class_name in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue
        
        dest_dir = os.path.join(augmented_dir, class_name)
        os.makedirs(dest_dir, exist_ok=True)
        
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img = Image.open(img_path).convert("RGB")
            
            # Save original
            img.save(os.path.join(dest_dir, img_name))
            
            # Generate 10 augmented copies
            for i in range(10):
                aug_img = augment(img)
                aug_img.save(os.path.join(dest_dir, f"{os.path.splitext(img_name)[0]}_aug{i}.jpg"))

    print("✅ Augmented dataset created successfully!")
else:
    print("🔁 Augmented dataset already exists — skipping creation.")


🧬 Creating augmented dataset...
✅ Augmented dataset created successfully!


# 3. Data Transforms (Train / Validation)

In [4]:
train_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.RandomResizedCrop(300, scale=(0.8, 1.0)),
    transforms.RandomRotation(25),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomAffine(degrees=15, shear=8),
    transforms.RandomHorizontalFlip(),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.15)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

val_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])


# 4. Dataset and DataLoader

In [5]:
dataset = datasets.ImageFolder(root=augmented_dir, transform=train_transforms)

# Split into train/val (85/15)
train_size = int(0.85 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Apply val transforms
val_dataset.dataset.transform = val_transforms

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

class_names = dataset.classes
print(f"🧾 Classes: {class_names}")
print(f"📊 Train samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")


🧾 Classes: ['001', '002', '003', '004', '005', '006', '007', '008', '009', '010']
📊 Train samples: 561, Validation samples: 99


# 5. Model Definition (EfficientNet-B0)

In [6]:
model = models.efficientnet_b0(pretrained=True)

# Freeze feature extractor
for param in model.features.parameters():
    param.requires_grad = False

# Replace classifier head
num_features = model.classifier[1].in_features
model.classifier[1] = nn.Sequential(
    nn.Dropout(0.6),
    nn.Linear(num_features, len(class_names))
)

# Multi-GPU support
if torch.cuda.device_count() > 1:
    print(f"🚀 Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)

model = model.to(device)


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-3dd342df.pth


  0%|          | 0.00/20.5M [00:00<?, ?B/s]

🚀 Using 2 GPUs!


# 6. Loss, Optimizer, Scheduler, Early Stopping

In [7]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)

best_acc = 0.0
epochs_no_improve = 0
patience = 6
best_model_wts = copy.deepcopy(model.state_dict())


# 7. Training Loop

In [8]:
epochs = 50
for epoch in range(epochs):
    model.train()
    running_loss, running_corrects = 0, 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    train_loss = running_loss / len(train_loader.dataset)
    train_acc = running_corrects.double() / len(train_loader.dataset)

    # ---- Validation ----
    model.eval()
    val_loss, val_corrects = 0, 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)

    val_loss /= len(val_loader.dataset)
    val_acc = val_corrects.double() / len(val_loader.dataset)
    scheduler.step(val_acc)

    print(f"Epoch [{epoch+1}/{epochs}] | "
          f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | LR: {optimizer.param_groups[0]['lr']:.6f}")

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        best_model_wts = copy.deepcopy(model.state_dict())
        torch.save(model.state_dict(), "best_fingerprint_model_final.pth")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    if epochs_no_improve >= patience:
        print("🛑 Early stopping triggered!")
        break

print(f"✅ Best Validation Accuracy: {best_acc:.4f}")


Epoch [1/50] | Train Acc: 0.1301 | Val Acc: 0.1616 | LR: 0.000100
Epoch [2/50] | Train Acc: 0.1711 | Val Acc: 0.3232 | LR: 0.000100
Epoch [3/50] | Train Acc: 0.1925 | Val Acc: 0.4646 | LR: 0.000100
Epoch [4/50] | Train Acc: 0.2406 | Val Acc: 0.5051 | LR: 0.000100
Epoch [5/50] | Train Acc: 0.2692 | Val Acc: 0.6061 | LR: 0.000100
Epoch [6/50] | Train Acc: 0.3387 | Val Acc: 0.6263 | LR: 0.000100
Epoch [7/50] | Train Acc: 0.3886 | Val Acc: 0.6667 | LR: 0.000100
Epoch [8/50] | Train Acc: 0.3922 | Val Acc: 0.6465 | LR: 0.000100
Epoch [9/50] | Train Acc: 0.4599 | Val Acc: 0.7273 | LR: 0.000100
Epoch [10/50] | Train Acc: 0.4617 | Val Acc: 0.7273 | LR: 0.000100
Epoch [11/50] | Train Acc: 0.4742 | Val Acc: 0.7677 | LR: 0.000100
Epoch [12/50] | Train Acc: 0.5455 | Val Acc: 0.7879 | LR: 0.000100
Epoch [13/50] | Train Acc: 0.5740 | Val Acc: 0.7778 | LR: 0.000100
Epoch [14/50] | Train Acc: 0.5722 | Val Acc: 0.7778 | LR: 0.000100
Epoch [15/50] | Train Acc: 0.5971 | Val Acc: 0.7576 | LR: 0.000100
Epoc

# 8. Evaluation and Final Accuracy

In [9]:
model.load_state_dict(best_model_wts)
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

final_acc = accuracy_score(all_labels, all_preds)
print(f"🔥 Final Validation Accuracy: {final_acc:.4f}")


🔥 Final Validation Accuracy: 0.8384
