In [None]:
import os
import cv2
import numpy as np
import random
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import regularizers
from tensorflow.keras.models import load_model
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import label_binarize
from collections import defaultdict

In [None]:
import os
import shutil

# raw_dir = "../data/pre-raw"
output_dir = "../data/raw"  

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

if not os.path.exists(raw_dir):
    raw_dir = input("Input raw dataset directory:")

files = [f for f in os.listdir(raw_dir) if os.path.isfile(os.path.join(raw_dir, f))]
files.sort()

individu_finger_map = {}
global_finger_serial = 1

for file in files:
    name, ext = os.path.splitext(file)
    parts = name.split("_")

    if len(parts) != 3:
        print(f"Skipping invalid filename: {file}")
        continue

    individu = parts[0]
    finger_orig = int(parts[1])
    scan = parts[2]

    if individu not in individu_finger_map:
        individu_finger_map[individu] = {}

    if finger_orig not in individu_finger_map[individu]:
        mapped_finger = global_finger_serial
        individu_finger_map[individu][finger_orig] = mapped_finger
        global_finger_serial += 1  
    else:
        mapped_finger = individu_finger_map[individu][finger_orig]

    new_name = f"{individu}_{mapped_finger}_{scan}{ext}"

    src_file = os.path.join(raw_dir, file)
    dest_file = os.path.join(output_dir, new_name)

    shutil.copy(src_file, dest_file)
    print(f"Copied: {file} → {new_name}")


In [None]:
def skeletonize(img):
    """Alternative skeletonization implementation without ximgproc"""
    skel = np.zeros(img.shape, np.uint8)
    element = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
    while True:
        open_img = cv2.morphologyEx(img, cv2.MORPH_OPEN, element)
        temp = cv2.subtract(img, open_img)
        eroded = cv2.erode(img, element)
        skel = cv2.bitwise_or(skel, temp)
        img = eroded.copy()
        if cv2.countNonZero(img) == 0:
            break
    return skel

def process_fingerprint(image_path, output_path, target_size=None, upscale_factor=1.0):
    # Read image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Could not read image: {image_path}")
    
    # Resize if needed
    if target_size:
        image = cv2.resize(image, target_size, interpolation=cv2.INTER_LANCZOS4)
    elif upscale_factor != 1.0:
        h, w = image.shape
        image = cv2.resize(image, (int(w*upscale_factor), int(h*upscale_factor)), 
                         interpolation=cv2.INTER_LANCZOS4)
    
    # 1. Contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(image)
    
    # 2. Noise reduction
    denoised = cv2.bilateralFilter(enhanced, 9, 75, 75)
    
    # 3. Adaptive thresholding
    thresh = cv2.adaptiveThreshold(denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                cv2.THRESH_BINARY_INV, 21, 7)
    
    # 4. Morphological operations
    kernel = np.ones((3,3), np.uint8)
    morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel, iterations=1)
    
    # 5. Skeletonization (using alternative method)
    skeleton = skeletonize(morph)
    
    # 6. Final inversion and saving
    result = cv2.bitwise_not(skeleton)
    cv2.imwrite(output_path, result)

def batch_process_fingerprints(input_folder, output_folder, upscale=False):
    """
    Process all fingerprints in a folder
    """
    os.makedirs(output_folder, exist_ok=True)
    files = [f for f in os.listdir(input_folder) if f.lower().endswith('.tif')]
    
    for filename in tqdm(files, desc="Processing Fingerprints"):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        
        try:
            if upscale:
                process_fingerprint(input_path, output_path, upscale_factor=2.0)
            else:
                process_fingerprint(input_path, output_path)
        except Exception as e:
            print(f"\nError processing {filename}: {str(e)}")
            continue

if __name__ == "__main__":
    INPUT_FOLDER = "../data/raw"
    OUTPUT_FOLDER = "../data/processed"
    UPSCALE_IMAGES = True  # Set to True for 2x upscaling
    
    print("Starting fingerprint processing...")
    batch_process_fingerprints(INPUT_FOLDER, OUTPUT_FOLDER, upscale=UPSCALE_IMAGES)
    print("\nProcessing completed successfully!")

Starting fingerprint processing...


Processing Fingerprints: 100%|██████████| 2056/2056 [01:01<00:00, 33.22it/s]


Processing completed successfully!





In [6]:
# Set random seed for reproducibility
random.seed(42)  # Ensures the same split every time you run the code

# Define paths
processed_dir  = "../data/processed"
train_dir  = "../data/final/train"
test_dir  = "../data/final/test"

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

finger_groups = defaultdict(list)

for filename in sorted(os.listdir(processed_dir)):
    if not filename.lower().endswith((".png", ".jpg", ".jpeg", ".tif")):
        continue
    parts = filename.split("_")
    if len(parts) < 3:
        continue
    class_id = f"{parts[0]}_{parts[1]}" 
    finger_groups[class_id].append(filename)

# Step 2: Move files into class folders for train/test
for class_id, files in finger_groups.items():
    files.sort(key=lambda f: int(f.split("_")[2].split(".")[0]))  # Sort by image index

    train_class_dir = os.path.join(train_dir, class_id)
    test_class_dir = os.path.join(test_dir, class_id)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    for i, file in enumerate(files):
        src = os.path.join(processed_dir, file)
        if i < 6:
            dst = os.path.join(train_class_dir, file)
        else:
            dst = os.path.join(test_class_dir, file)
        shutil.copy(src, dst)
        print(f"Copied {file} → {'train' if i < 6 else 'test'}/{class_id}")

print("✅ Dataset split complete with subfolders as classes.")

In [7]:
# Path configuration
test_dir = "../data/final/test/"

def apply_block_damage_smart(image, block_size=80, num_blocks=7):
    """Apply white block damage only on fingerprint area."""
    damaged = np.copy(image)
    height, width = image.shape
    mask = image < 250
    ys, xs = np.where(mask)

    if len(xs) == 0:
        return damaged

    for _ in range(num_blocks):
        idx = random.randint(0, len(xs) - 1)
        x_center, y_center = xs[idx], ys[idx]
        x1 = max(0, x_center - block_size // 2)
        y1 = max(0, y_center - block_size // 2)
        x2 = min(width, x1 + block_size)
        y2 = min(height, y1 + block_size)
        damaged[y1:y2, x1:x2] = 255
    return damaged

def apply_blur_damage(image, block_size=60, num_blocks=5):
    damaged = np.copy(image)
    height, width = image.shape

    for _ in range(num_blocks):
        x = random.randint(0, width - block_size)
        y = random.randint(0, height - block_size)
        roi = damaged[y:y+block_size, x:x+block_size]
        blurred = cv2.GaussianBlur(roi, (11, 11), 0)
        damaged[y:y+block_size, x:x+block_size] = blurred
    return damaged

def apply_elliptical_noise(image, num_ellipses=5):
    damaged = np.copy(image)
    height, width = image.shape

    for _ in range(num_ellipses):
        center = (
            random.randint(0, width),
            random.randint(0, height)
        )
        axes = (
            random.randint(20, 60),  # X-axis radius
            random.randint(10, 30)   # Y-axis radius
        )
        angle = random.randint(0, 180)
        startAngle = 0
        endAngle = 360
        color = 255 
        thickness = -1 

        cv2.ellipse(damaged, center, axes, angle, startAngle, endAngle, color, thickness)
    return damaged


def apply_combined_damage(image):
    """Apply all three types of damage sequentially."""
    image = apply_block_damage_smart(image)
    image = apply_blur_damage(image)
    image = apply_elliptical_noise(image)
    return image

# Get all test images (.tif files)
test_images = []
for root, _, files in os.walk(test_dir):
    for file in files:
        if file.endswith(".tif"):
            test_images.append(os.path.join(root, file))
print(f"Found {len(test_images)} test images to process")

# Process and overwrite originals
for image_path in tqdm(test_images, desc="Applying combined damage"):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    if image is not None:
        damaged_image = apply_combined_damage(image)
        cv2.imwrite(image_path, damaged_image)

print(f"\nOverwritten {len(test_images)} images with combined damage in {test_dir}")

Found 514 test images to process


Applying combined damage: 100%|██████████| 514/514 [00:08<00:00, 59.46it/s]


Overwritten 514 images with combined damage in ../data/final/test/





In [8]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from albumentations import (
    Compose, OneOf, Rotate, ShiftScaleRotate, RandomBrightnessContrast,
    CLAHE, GaussNoise, CoarseDropout, GridDistortion, ElasticTransform, MotionBlur
)
from PIL import Image

train_dir = "../data/final/train"

# Augmentation pipeline
def get_augmentation_pipeline():
    return Compose([
        Rotate(limit=10, p=0.7),
        ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=10, p=0.5),
        OneOf([
            RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.5),
            CLAHE(clip_limit=2.0, p=0.5)
        ], p=0.6),
        OneOf([
            MotionBlur(blur_limit=(3, 7), p=0.5),
            GaussNoise(var_limit=(10.0, 30.0), p=0.5),
        ], p=0.5),
        OneOf([
            ElasticTransform(alpha=1, sigma=50, alpha_affine=30, p=0.3),
            GridDistortion(num_steps=5, distort_limit=0.3, p=0.3)
        ], p=0.4),
        CoarseDropout(num_holes=1, max_h_size=16, max_w_size=16, fill_value=0, p=0.5)
    ])

def augment_and_save_images_in_place(input_dir, augmentations_per_image=5):
    transform = get_augmentation_pipeline()

    for class_name in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        for fname in tqdm(os.listdir(class_path), desc=f"Augmenting {class_name}"):
            if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".tif")):
                continue

            fpath = os.path.join(class_path, fname)
            img = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue

            h, w = img.shape
            img = img.reshape(h, w, 1)  

            for i in range(augmentations_per_image):
                augmented = transform(image=img)
                aug_img = augmented["image"]

                aug_img = np.clip(aug_img, 0, 255).astype('uint8')

                # Save image
                aug_img_pil = Image.fromarray(aug_img.squeeze(), mode='L')
                prefix = os.path.splitext(fname)[0]
                save_path = os.path.join(class_path, f"{prefix}_aug{i+1}.png")
                aug_img_pil.save(save_path)

augment_and_save_images_in_place(train_dir, augmentations_per_image=5)
print("✅ Augmentation with Albumentations done.")

Augmenting 001_1: 100%|██████████| 6/6 [00:04<00:00,  1.24it/s]
Augmenting 001_2: 100%|██████████| 6/6 [00:04<00:00,  1.27it/s]
Augmenting 001_3: 100%|██████████| 6/6 [00:05<00:00,  1.18it/s]
Augmenting 001_4: 100%|██████████| 6/6 [00:04<00:00,  1.27it/s]
Augmenting 001_5: 100%|██████████| 6/6 [00:04<00:00,  1.31it/s]
Augmenting 001_6: 100%|██████████| 6/6 [00:05<00:00,  1.01it/s]
Augmenting 002_10: 100%|██████████| 6/6 [00:05<00:00,  1.07it/s]
Augmenting 002_11: 100%|██████████| 6/6 [00:06<00:00,  1.01s/it]
Augmenting 002_12: 100%|██████████| 6/6 [00:06<00:00,  1.00s/it]
Augmenting 002_7: 100%|██████████| 6/6 [00:06<00:00,  1.08s/it]
Augmenting 002_8: 100%|██████████| 6/6 [00:07<00:00,  1.17s/it]
Augmenting 002_9: 100%|██████████| 6/6 [00:06<00:00,  1.07s/it]
Augmenting 003_13: 100%|██████████| 6/6 [00:06<00:00,  1.10s/it]
Augmenting 003_14: 100%|██████████| 6/6 [00:06<00:00,  1.06s/it]
Augmenting 003_15: 100%|██████████| 6/6 [00:05<00:00,  1.08it/s]
Augmenting 003_16: 100%|██████████

✅ Augmentation done.





In [9]:
import os
import cv2
import random
from tqdm import tqdm

train_dir = "../data/final/train"
test_dir  = "../data/final/test"

print("\n🔧 Applying damage per class in training dataset...")

total_clean = 0
total_damaged = 0

for class_id in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_id)
    if not os.path.isdir(class_path):
        continue

    # Collect valid image filenames
    images = [f for f in os.listdir(class_path) if f.lower().endswith(('.tif', '.png', '.jpg', '.jpeg', '.bmp'))]
    num_images = len(images)
    
    # Calculate 25% of images to damage
    num_to_damage = int(num_images * 0.25)
    selected = random.sample(images, num_to_damage)

    # Apply damage
    for fname in selected:
        img_path = os.path.join(class_path, fname)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            damaged = apply_combined_damage(img)
            cv2.imwrite(img_path, damaged)

    total_clean += (num_images - num_to_damage)
    total_damaged += num_to_damage
    print(f"Class {class_id}: {num_images - num_to_damage} clean + {num_to_damage} damaged")

print(f"\n✅ Final training set: {total_clean + total_damaged} images total — {total_clean} clean + {total_damaged} damaged.")



🔧 Applying damage per class in training dataset...
Class 001_1: 27 clean + 9 damaged
Class 001_2: 27 clean + 9 damaged
Class 001_3: 27 clean + 9 damaged
Class 001_4: 27 clean + 9 damaged
Class 001_5: 27 clean + 9 damaged
Class 001_6: 27 clean + 9 damaged
Class 002_10: 27 clean + 9 damaged
Class 002_11: 27 clean + 9 damaged
Class 002_12: 27 clean + 9 damaged
Class 002_7: 27 clean + 9 damaged
Class 002_8: 27 clean + 9 damaged
Class 002_9: 27 clean + 9 damaged
Class 003_13: 27 clean + 9 damaged
Class 003_14: 27 clean + 9 damaged
Class 003_15: 27 clean + 9 damaged
Class 003_16: 27 clean + 9 damaged
Class 003_17: 27 clean + 9 damaged
Class 003_18: 27 clean + 9 damaged
Class 004_19: 27 clean + 9 damaged
Class 004_20: 27 clean + 9 damaged
Class 004_21: 27 clean + 9 damaged
Class 004_22: 27 clean + 9 damaged
Class 004_23: 27 clean + 9 damaged
Class 004_24: 27 clean + 9 damaged
Class 005_25: 27 clean + 9 damaged
Class 005_26: 27 clean + 9 damaged
Class 005_27: 27 clean + 9 damaged
Class 005_28