# Adding new Real World fingerprint data

In [None]:
!pip install pgvector psycopg2-binary


Collecting pgvector
  Downloading pgvector-0.4.1-py3-none-any.whl.metadata (18 kB)
Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Downloading pgvector-0.4.1-py3-none-any.whl (27 kB)
Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: psycopg2-binary, pgvector
Successfully installed pgvector-0.4.1 psycopg2-binary-2.9.10


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ruizgara/socofing")
path = path + "/SOCOFing/Real/"
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/socofing/SOCOFing/Real/


In [None]:
import psycopg2


def connect():
    conn = psycopg2.connect('Enter your own database credentials here')

    query_sql = 'SELECT VERSION()'

    cur = conn.cursor()
    return cur, conn




# Addition of new data

In [None]:
# -----------------------------------------
# 1) Recreate your metric-learning model
# -----------------------------------------
import torch
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

class FingerprintNet(nn.Module):
    def __init__(self, embedding_dim=128):
        super().__init__()
        self.backbone = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(num_ftrs, embedding_dim)

    def forward(self, x):
        return F.normalize(self.backbone(x), p=2, dim=1)

# -----------------------------------------
# 2) Load your fine-tuned model
# -----------------------------------------
MODEL_PATH = "/content/fingerprint_model_finetuned2.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FingerprintNet(embedding_dim=128).to(device)
state = torch.load(MODEL_PATH, map_location=device, weights_only=False)
model.load_state_dict(state)
model.eval()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 178MB/s]


FingerprintNet(
  (backbone): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, t

In [None]:
import os
import numpy as np
import torch
from PIL import Image
from torchvision import transforms
import torch.nn.functional as F
import psycopg2
from pgvector.psycopg2 import register_vector
from psycopg2.extras import execute_values

# ────── 1. Assumed globals ──────
# (from earlier code)
DB_CONFIG: dict             # your Postgres connection dict
file_ids: list              # current in-memory list of file_id strings
embeddings: np.ndarray      # current in-memory array shape [N,128]
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# ────── 2. Function to fetch existing IDs from DB ──────
def fetch_existing_file_ids():
    cur, conn = connect()
    register_vector(conn)
    cur.execute("SELECT file_id FROM public.fingerprint_embedding3;")
    rows = cur.fetchall()
    cur.close()
    conn.close()
    return {row[0] for row in rows}

# ────── 3. Add new fingerprints ──────
def add_new_fingerprints(new_dir: str):
    # 3a) find all BMPs under new_dir
    new_paths = []
    for root, _, files in os.walk(new_dir):
        for f in files:
            if f.lower().endswith(".bmp"):
                new_paths.append(os.path.join(root, f))
    new_paths.sort()

    # 3b) skip paths already in DB
    existing_ids = fetch_existing_file_ids()
    to_add = [p for p in new_paths if os.path.basename(p) not in existing_ids]
    if not to_add:
        print("No new fingerprint files to add.")
        return
    else:
      print(f"Adding {len(to_add)} new fingerprint files.")

    # 3c) compute embeddings
    records = []
    with torch.no_grad():
        for path in to_add:
            img = Image.open(path).convert("L")
            t   = transform(img).unsqueeze(0).to(device)  # [1,1,224,224]
            emb = model(t).cpu().numpy().flatten().tolist()
            fid = os.path.basename(path)
            records.append((fid, emb))

    # 3d) upsert into DB
    cur, conn = connect()
    register_vector(conn)
    execute_values(cur,
        """
        INSERT INTO public.fingerprint_embedding3 (file_id, embedding)
        VALUES %s
        ON CONFLICT (file_id) DO UPDATE
          SET embedding = EXCLUDED.embedding
        """,
        records,
        template="(%s, %s::vector)"
    )
    conn.commit()
    cur.close()
    conn.close()
    print(f"Upserted {len(records)} new embeddings into DB.")

    # 3e) update in-memory lists
    global file_ids, embeddings
    for fid, emb in records:
        file_ids.append(fid)
        embeddings = np.vstack([embeddings, np.array(emb, dtype=float)])

# ────── 4. Usage ──────
# Point this at your “new fingerprints” folder:
add_new_fingerprints("/content/")


No new fingerprint files to add.


In [None]:
# -----------------------------------------
# 5) Fetch all embeddings at once
# -----------------------------------------
def load_all_embeddings():
    cur, conn = connect()
    register_vector(conn)
    cur.execute("SELECT file_id, embedding FROM public.fingerprint_embedding3 ORDER BY file_id ASC;")
    rows = cur.fetchall()
    cur.close()
    conn.close()

    file_ids = [row[0] for row in rows]
    embeddings = np.stack([np.array(row[1], dtype=float) for row in rows], axis=0)
    return file_ids, embeddings

# Usage:
file_ids, embeddings = load_all_embeddings()
print("Loaded", len(file_ids), "embeddings from DB")
gallery_embeddings = np.array(embeddings)  # shape [N,128]


Loaded 6001 embeddings from DB


# Embedding and Storing

In [None]:
import os
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
import psycopg2
from pgvector.psycopg2 import register_vector
from psycopg2.extras import execute_values


# -----------------------------------------
# 1) Recreate your metric-learning model
# -----------------------------------------
import torch.nn as nn
from torchvision.models import resnet18, ResNet18_Weights

class FingerprintNet(nn.Module):
    def __init__(self, embedding_dim=128):
        super().__init__()
        self.backbone = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        num_ftrs = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(num_ftrs, embedding_dim)

    def forward(self, x):
        return F.normalize(self.backbone(x), p=2, dim=1)

# -----------------------------------------
# 2) Load your fine-tuned model
# -----------------------------------------
MODEL_PATH = "/content/fingerprint_model_finetuned2.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = FingerprintNet(embedding_dim=128).to(device)
state = torch.load(MODEL_PATH, map_location=device, weights_only=False)
model.load_state_dict(state)
model.eval()

# -----------------------------------------
# 3) Precompute gallery embeddings
# -----------------------------------------
DATA_DIR = path
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

# gather all BMP paths
gallery_paths = []
for root, _, files in os.walk(DATA_DIR):
    for f in files:
        if f.lower().endswith(".bmp"):
            gallery_paths.append(os.path.join(root, f))
gallery_paths.sort()

# compute embeddings
gallery_records = []  # list of (file_id, [128 floats])
with torch.no_grad():
    for path in gallery_paths:
        img = Image.open(path).convert("L")
        t   = transform(img).unsqueeze(0).to(device)
        emb = model(t).cpu().numpy().flatten().tolist()
        file_id = os.path.basename(path)
        gallery_records.append((file_id, emb))

# -----------------------------------------
# 4) Upsert into Postgres
# -----------------------------------------
cur, conn = connect()
register_vector(conn)  # enables pgvector support


# create table if not exists
cur.execute("""
CREATE EXTENSION IF NOT EXISTS vector;
CREATE TABLE IF NOT EXISTS public.fingerprint_embedding3 (
  file_id TEXT PRIMARY KEY,
  embedding VECTOR(128)
);
""")

# upsert all records in one batch
execute_values(cur,
    """
    INSERT INTO public.fingerprint_embedding3 (file_id, embedding)
    VALUES %s
    ON CONFLICT (file_id) DO UPDATE
      SET embedding = EXCLUDED.embedding
    """,
    gallery_records,
    template="(%s, %s::vector)"  # tell psycopg2 that 2nd field is vector
)

conn.commit()
cur.close()
conn.close()
print(f"Upserted {len(gallery_records)} embeddings into the database")




Upserted 6000 embeddings into the database


# Prediction

In [None]:
def predict_fingerprint_from_db(test_image_path, top_k=1):
    img = Image.open(test_image_path).convert("L")
    t = transform(img).unsqueeze(0)  # shape [1,1,224,224]
    t = t.to(device)

    with torch.no_grad():
        q_emb = model(t).cpu().numpy()  # shape [1,128]

    sims = cosine_similarity(q_emb, gallery_embeddings)[0]  # shape [N]
    best_idxs = np.argsort(sims)[::-1][:top_k]

    results = [(file_ids[i], float(sims[i])) for i in best_idxs]
    return results

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
query_img = "/content/test.bmp"
matches = predict_fingerprint_from_db(query_img, top_k=3)

print("Top matches:")
for file_id, score in matches:
    print(f"→ {file_id} (similarity: {score:.4f})")


Top matches:
→ 515__M_Right_thumb_finger.BMP (similarity: 0.9734)
→ 136__F_Left_little_finger.BMP (similarity: 0.9698)
→ 92__F_Right_thumb_finger.BMP (similarity: 0.9560)
