# Finetuning Previously Built Model with Augmented Images

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ruizgara/socofing")
path = path + "/SOCOFing/Real/"
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/socofing/SOCOFing/Real/


In [None]:
#RETRAINing

import os
from pathlib import Path
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn.functional as F

# --------------------------------------
# Dataset (recursively reads all .BMPs)
# --------------------------------------
class SOCOFingDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform

        self.image_paths = []
        for root, _, files in os.walk(self.image_folder):
            for fname in files:
                if fname.lower().endswith('.bmp'):
                    self.image_paths.append(os.path.join(root, fname))

        self.label_map = self._build_label_map()

    def _build_label_map(self):
        labels = set()
        for path in self.image_paths:
            filename = os.path.basename(path)
            person_id = filename.split("__")[0]
            labels.add(person_id)
        return {pid: idx for idx, pid in enumerate(sorted(labels))}

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('L')  # grayscale

        if self.transform:
            image = self.transform(image)

        filename = os.path.basename(img_path)
        person_id = filename.split("__")[0]
        label = self.label_map[person_id]

        return image, label

# ---------------------------------------------------------
# Triplet loss wrapper (distance-based, no TensorFlow)
# ---------------------------------------------------------
class TripletLoss(nn.Module):
    def __init__(self, margin=0.3):
        super().__init__()
        self.margin = margin
        self.euc = nn.PairwiseDistance(p=2)

    def forward(self, anchor, positive, negative):
        pos_dist = self.euc(anchor, positive)
        neg_dist = self.euc(anchor, negative)
        losses = F.relu(pos_dist - neg_dist + self.margin)
        return losses.mean()

# -----------------------------------------
# Model using ResNet18 + embedding layer
# -----------------------------------------
class FingerprintNet(nn.Module):
    def __init__(self, embedding_dim=128):
        super().__init__()
        self.backbone = models.resnet18(pretrained=True)
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(num_ftrs, embedding_dim)

    def forward(self, x):
        x = self.backbone(x)
        x = F.normalize(x, p=2, dim=1)
        return x

# ----------------------------------------------
# Generate Triplets from Dataset on-the-fly
# ----------------------------------------------
import random

class TripletDataset(Dataset):
    def __init__(self, base_dataset):
        self.base_dataset = base_dataset
        self.label_to_indices = self._build_index()

    def _build_index(self):
        label_to_indices = {}
        for idx, (_, label) in enumerate(self.base_dataset):
            label_to_indices.setdefault(label, []).append(idx)
        return label_to_indices

    def __len__(self):
        return len(self.base_dataset)

    def __getitem__(self, idx):
        anchor_img, anchor_label = self.base_dataset[idx]
        pos_idx = idx
        while pos_idx == idx:
            pos_idx = random.choice(self.label_to_indices[anchor_label])
        positive_img, _ = self.base_dataset[pos_idx]

        neg_label = anchor_label
        while neg_label == anchor_label:
            neg_label = random.choice(list(self.label_to_indices.keys()))
        neg_idx = random.choice(self.label_to_indices[neg_label])
        negative_img, _ = self.base_dataset[neg_idx]

        return anchor_img, positive_img, negative_img

# ----------------------------
# Fine-Tuning Pipeline
# ----------------------------
def fine_tune_model():
    # ---- 1. Hyperparams ----
    embedding_dim = 128
    batch_size = 32
    num_epochs = 5
    margin = 0.3
    lr = 1e-5

    # ---- 2. Data transforms ----
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(15),
        transforms.RandomAffine(0, translate=(0.1,0.1), scale=(0.9, 1.1)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])
    ])

    # ---- 3. Load dataset ----
    data_dir = "/kaggle/input/socofing/SOCOFing/Altered/Altered-Hard/"  # update as needed
    base_dataset = SOCOFingDataset(data_dir, transform=transform)
    triplet_dataset = TripletDataset(base_dataset)
    dataloader = DataLoader(triplet_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

    # ---- 4. Load model & freeze ----
    model = FingerprintNet(embedding_dim=embedding_dim).cuda()
    model.load_state_dict(torch.load("/content/fingerprint_model_finetuned.pth", weights_only=False))

    for name, param in model.backbone.named_parameters():
        if not name.startswith("layer3") and not name.startswith("layer4"):
            param.requires_grad = False

    # ---- 5. Loss + Optimizer ----
    criterion = TripletLoss(margin=margin)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=1e-6)

    # ---- 6. Fine-tune ----
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for anchor, positive, negative in dataloader:
            anchor = anchor.cuda()
            positive = positive.cuda()
            negative = negative.cuda()

            emb_a = model(anchor)
            emb_p = model(positive)
            emb_n = model(negative)

            loss = criterion(emb_a, emb_p, emb_n)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        print(f"[Epoch {epoch+1}/{num_epochs}] Loss: {avg_loss:.4f}")

    # ---- 7. Save model ----
    torch.save(model.state_dict(), "fingerprint_model_finetuned2.pth")
    print("Fine-tuned model saved as fingerprint_model_finetuned2.pth")

# --------------------------------
# Run the fine-tuning
# --------------------------------
if __name__ == "__main__":
    fine_tune_model()




[Epoch 1/5] Loss: 0.1106
[Epoch 2/5] Loss: 0.1076
[Epoch 3/5] Loss: 0.1071
[Epoch 4/5] Loss: 0.1018
[Epoch 5/5] Loss: 0.1021
Fine-tuned model saved as fingerprint_model_finetuned2.pth


# Prediction

In [None]:
import psycopg2


def connect():
    conn = psycopg2.connect('postgres://avnadmin:AVNS_XKtBCudrgNrH93_oBdP@pg-1c4022a0-sabc-3721.h.aivencloud.com:25080/defaultdb?sslmode=require')

    query_sql = 'SELECT VERSION()'

    cur = conn.cursor()
    return cur, conn




In [None]:
import os
import psycopg2
import torch
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
import numpy as np
#from psycopg2.extras import register_vector

# ----------------------------
# 1) Model architecture
# ----------------------------
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

class FingerprintNet(nn.Module):
    def __init__(self, embedding_dim=128):
        super().__init__()
        self.backbone = resnet18(weights=ResNet18_Weights.DEFAULT)
        # change to accept 1-channel
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # replace head
        num_ftrs = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(num_ftrs, embedding_dim)

    def forward(self, x):
        x = self.backbone(x)
        return F.normalize(x, p=2, dim=1)

# ----------------------------
# 2) Load your fine-tuned model
# ----------------------------
MODEL_PATH = "/content/fingerprint_model_finetuned.pth"   # update as needed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FingerprintNet(embedding_dim=128).to(device)
state = torch.load(MODEL_PATH, map_location=device, weights_only=False)
model.load_state_dict(state)
model.eval()

# ----------------------------
# 3) Image transform
# ----------------------------
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# ----------------------------
# 4) DB helper
# ----------------------------
def find_nearest_in_db(query_emb: np.ndarray):
    """
    Finds the closest fingerprint in the DB using pgvector's <-> operator.
    """
    # register the vector type (pgvector) if needed

    #register_vector(conn)
    cur, conn = connect()

    # prepare if not already
    cur.execute("""
      PREPARE find_nearest(vector) AS
        SELECT file_id, embedding <-> $1 AS distance
        FROM public.fingerprint_embeddings
        ORDER BY distance
        LIMIT 1;
    """)

    # execute with the embedding as a Python list
    cur.execute("EXECUTE find_nearest(%s::vector);", (query_emb.tolist(),))
    file_id, dist = cur.fetchone()
    conn.close()
    return file_id, dist

# ----------------------------
# 5) Recognition function
# ----------------------------
def recognize_fingerprint(test_path: str):
    # 1. Load & preprocess
    img = Image.open(test_path).convert("L")
    tensor = transform(img).unsqueeze(0).to(device)  # [1,1,224,224]

    # 2. Get embedding
    with torch.no_grad():
        emb = model(tensor).cpu().numpy().flatten()  # [128]

    # 3. Query DB
    match_file, distance = find_nearest_in_db(emb)
    print(f"Test file: {os.path.basename(test_path)}")
    print(f"Matched : {match_file}  (distance={distance:.4f})")
    return match_file, distance

# ----------------------------
# 6) Example usage
# ----------------------------

test_path = "/content/RR89.BMP"
recognize_fingerprint(test_path)


Test file: RR89.BMP
Matched : 231__M_Left_ring_finger.BMP  (distance=10.3034)


('231__M_Left_ring_finger.BMP', 10.303426554058742)

In [None]:
import os
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity

# -----------------------------------------
# 1) Recreate your metric‑learning model
# -----------------------------------------
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

class FingerprintNet(nn.Module):
    def __init__(self, embedding_dim=128):
        super().__init__()
        # 1‑channel ResNet18
        self.backbone = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # replace final fc with an embedding head
        num_ftrs = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(num_ftrs, embedding_dim)

    def forward(self, x):
        x = self.backbone(x)
        return F.normalize(x, p=2, dim=1)

# -----------------------------------------
# 2) Load your fine‑tuned weights
# -----------------------------------------
model_path = "/content/fingerprint_model_finetuned2.pth"
model = FingerprintNet(embedding_dim=128).cpu()
state = torch.load(model_path, map_location="cpu", weights_only=False)
model.load_state_dict(state)
model.eval()

# -----------------------------------------
# 3) Build your “gallery” embeddings
# -----------------------------------------
DATA_DIR = "/kaggle/input/socofing/SOCOFing/Real"  # points at both Real/ & Altered/
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

gallery_paths = [os.path.join(DATA_DIR, fname) for fname in os.listdir(DATA_DIR) if fname.endswith('.BMP')]
gallery_paths.sort()

gallery_embeddings = []
gallery_labels     = []
with torch.no_grad():
    for p in gallery_paths:
        img = Image.open(p).convert("L")       # grayscale
        t   = transform(img).unsqueeze(0)      # [1,1,224,224]
        emb = model(t).numpy().flatten()       # 128‑D
        gallery_embeddings.append(emb)
        gallery_labels.append(os.path.basename(p).split("__")[0])

gallery_embeddings = np.stack(gallery_embeddings, axis=0)  # [N,128]

# -----------------------------------------
# 4) Predict function
# -----------------------------------------
def predict_fingerprint(query_path, top_k=1):
    img = Image.open(query_path).convert("L")
    t   = transform(img).unsqueeze(0)
    with torch.no_grad():
        q_emb = model(t).numpy()               # [1,128]
    sims = cosine_similarity(q_emb, gallery_embeddings)[0]
    best_idxs = np.argsort(sims)[::-1][:top_k]
    return [(gallery_labels[i], float(sims[i])) for i in best_idxs]




In [None]:
# -----------------------------------------
# 5) Example usage
# -----------------------------------------
query = "/content/RR89.BMP"  # your test image
results = predict_fingerprint(query, top_k=3)
for pid, score in results:
    print(f"→ Person {pid} (score={score:.4f})")

→ Person 89 (score=1.0000)
→ Person 378 (score=0.9860)
→ Person 476 (score=0.9833)


In [None]:
#Normal

import os
import glob
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# ─── 1. Gather all test fingerprints ─────────────────────────────
TEST_DIR = "/kaggle/input/socofing/SOCOFing/Real"   # change to your test folder
pattern = os.path.join(TEST_DIR, "**", "*.BMP")
test_paths = glob.glob(pattern, recursive=True)

# ─── 2. Run prediction & collect true/pred ───────────────────────
y_true = []
y_pred = []

for path in test_paths:
    # Extract ground‑truth person ID from filename (before the "__")
    true_id = os.path.basename(path).split("__")[0]
    y_true.append(true_id)
    print(f"Processing(true ID: {true_id})")

    # Predict top‑1
    top1 = predict_fingerprint(path, top_k=1)[0][0]  # returns [(pred_id, score)]
    y_pred.append(top1)
    print(f"Predicted ID: {top1}")

# ─── 3. Compute accuracy ──────────────────────────────────────────
acc = accuracy_score(y_true, y_pred)
print(f"Overall Top‑1 Accuracy: {acc*100:.2f}%")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Predicted ID: 223
Processing(true ID: 477)
Predicted ID: 477
Processing(true ID: 584)
Predicted ID: 584
Processing(true ID: 533)
Predicted ID: 533
Processing(true ID: 550)
Predicted ID: 550
Processing(true ID: 257)
Predicted ID: 257
Processing(true ID: 538)
Predicted ID: 538
Processing(true ID: 92)
Predicted ID: 92
Processing(true ID: 588)
Predicted ID: 588
Processing(true ID: 132)
Predicted ID: 132
Processing(true ID: 63)
Predicted ID: 63
Processing(true ID: 250)
Predicted ID: 250
Processing(true ID: 282)
Predicted ID: 282
Processing(true ID: 480)
Predicted ID: 480
Processing(true ID: 260)
Predicted ID: 260
Processing(true ID: 353)
Predicted ID: 353
Processing(true ID: 419)
Predicted ID: 419
Processing(true ID: 41)
Predicted ID: 41
Processing(true ID: 440)
Predicted ID: 440
Processing(true ID: 58)
Predicted ID: 58
Processing(true ID: 344)
Predicted ID: 344
Processing(true ID: 181)
Predicted ID: 181
Processing(true ID: 46

In [None]:
#Hard altered

import os
import glob
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# ─── 1. Gather all test fingerprints ─────────────────────────────
TEST_DIR = "/kaggle/input/socofing/SOCOFing/Altered/Altered-Hard"   # change to your test folder
pattern = os.path.join(TEST_DIR, "**", "*.BMP")
test_paths = glob.glob(pattern, recursive=True)

# ─── 2. Run prediction & collect true/pred ───────────────────────
y_true = []
y_pred = []

for path in test_paths:
    # Extract ground‑truth person ID from filename (before the "__")
    true_id = os.path.basename(path).split("__")[0]
    y_true.append(true_id)
    print(f"Processing(true ID: {true_id})")

    # Predict top‑1
    top1 = predict_fingerprint(path, top_k=1)[0][0]  # returns [(pred_id, score)]
    y_pred.append(top1)
    print(f"Predicted ID: {top1}")

# ─── 3. Compute accuracy ──────────────────────────────────────────
acc = accuracy_score(y_true, y_pred)
print(f"Overall Top‑1 Accuracy: {acc*100:.2f}%")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Predicted ID: 244
Processing(true ID: 579)
Predicted ID: 579
Processing(true ID: 484)
Predicted ID: 484
Processing(true ID: 338)
Predicted ID: 338
Processing(true ID: 117)
Predicted ID: 117
Processing(true ID: 321)
Predicted ID: 321
Processing(true ID: 309)
Predicted ID: 214
Processing(true ID: 534)
Predicted ID: 534
Processing(true ID: 236)
Predicted ID: 21
Processing(true ID: 411)
Predicted ID: 411
Processing(true ID: 325)
Predicted ID: 34
Processing(true ID: 488)
Predicted ID: 129
Processing(true ID: 501)
Predicted ID: 576
Processing(true ID: 530)
Predicted ID: 411
Processing(true ID: 575)
Predicted ID: 575
Processing(true ID: 246)
Predicted ID: 344
Processing(true ID: 312)
Predicted ID: 312
Processing(true ID: 318)
Predicted ID: 318
Processing(true ID: 79)
Predicted ID: 216
Processing(true ID: 320)
Predicted ID: 250
Processing(true ID: 468)
Predicted ID: 468
Processing(true ID: 333)
Predicted ID: 333
Processing(true I

In [None]:
#Medium altered

import os
import glob
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# ─── 1. Gather all test fingerprints ─────────────────────────────
TEST_DIR = "/kaggle/input/socofing/SOCOFing/Altered/Altered-Medium"   # change to your test folder
pattern = os.path.join(TEST_DIR, "**", "*.BMP")
test_paths = glob.glob(pattern, recursive=True)

# ─── 2. Run prediction & collect true/pred ───────────────────────
y_true = []
y_pred = []

for path in test_paths:
    # Extract ground‑truth person ID from filename (before the "__")
    true_id = os.path.basename(path).split("__")[0]
    y_true.append(true_id)
    print(f"Processing (true ID: {true_id})")

    # Predict top‑1
    top1 = predict_fingerprint(path, top_k=1)[0][0]  # returns [(pred_id, score)]
    y_pred.append(top1)
    print(f"Predicted ID: {top1}")

# ─── 3. Compute accuracy ──────────────────────────────────────────
acc = accuracy_score(y_true, y_pred)
print(f"Overall Top‑1 Accuracy: {acc*100:.2f}%")



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Predicted ID: 522
Processing (true ID: 479)
Predicted ID: 160
Processing (true ID: 209)
Predicted ID: 209
Processing (true ID: 449)
Predicted ID: 449
Processing (true ID: 411)
Predicted ID: 411
Processing (true ID: 382)
Predicted ID: 382
Processing (true ID: 318)
Predicted ID: 318
Processing (true ID: 256)
Predicted ID: 256
Processing (true ID: 478)
Predicted ID: 450
Processing (true ID: 331)
Predicted ID: 331
Processing (true ID: 224)
Predicted ID: 450
Processing (true ID: 343)
Predicted ID: 343
Processing (true ID: 197)
Predicted ID: 197
Processing (true ID: 388)
Predicted ID: 45
Processing (true ID: 300)
Predicted ID: 300
Processing (true ID: 166)
Predicted ID: 166
Processing (true ID: 439)
Predicted ID: 439
Processing (true ID: 143)
Predicted ID: 489
Processing (true ID: 494)
Predicted ID: 494
Processing (true ID: 383)
Predicted ID: 383
Processing (true ID: 403)
Predicted ID: 403
Processing (true ID: 210)
Predicted ID