<a href="https://colab.research.google.com/github/abhinaykumar2406/fake_fingerprint_colab/blob/main/fake_fingerprint_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import os

# List all files in the session storage ("/content" is the default)
files = os.listdir("/")
print("Files in session storage:", files)
!unzip -q /kaggle_dataset.zip -d /content/kaggle_dataset
print("Extraction completed!")

Files in session storage: ['boot', 'srv', 'lib64', 'media', 'usr', 'proc', 'opt', 'lib32', 'sbin', 'bin', 'run', 'etc', 'dev', 'libx32', 'sys', 'mnt', 'tmp', 'home', 'lib', 'root', 'var', 'kaggle_dataset.zip', 'kaggle', 'content', '.dockerenv', 'tools', 'datalab', 'python-apt', 'python-apt.tar.xz', 'NGC-DL-CONTAINER-LICENSE', 'cuda-keyring_1.1-1_all.deb']
Extraction completed!


In [23]:
import os
import shutil
import random

# Define dataset paths
dataset_path = "/content/kaggle_dataset/SOCOFing"
output_path = "/content/split_dataset"

# Define train and test paths
train_path = os.path.join(output_path, "train")
test_path = os.path.join(output_path, "test")

# Train-test split ratios
train_ratio_real = 0.8  # 80% for Real images
train_ratio_altered = 0.1  # 30% for Altered images

# Remove existing split_dataset directory to start fresh
if os.path.exists(output_path):
    shutil.rmtree(output_path)  # Deletes the entire split_dataset folder
    print("Old split_dataset folder removed!")
# Create fresh train and test directories
for folder in ["Real", "Altered-Easy", "Altered-Medium", "Altered-Hard"]:
    os.makedirs(os.path.join(train_path, folder), exist_ok=True)
    os.makedirs(os.path.join(test_path, folder), exist_ok=True)

# Function to split files into train and test
def split_files(category, category_path, train_ratio):
    """Splits files into train and test sets based on the given train ratio."""
    files = [f for f in os.listdir(category_path) if os.path.isfile(os.path.join(category_path, f))]
    random.shuffle(files)

    split_index = int(len(files) * train_ratio)
    train_files = files[:split_index]
    test_files = files[split_index:]

    # Copy train files
    for file in train_files:
        shutil.copy(os.path.join(category_path, file), os.path.join(train_path, category, file))

    # Copy test files
    for file in test_files:
        shutil.copy(os.path.join(category_path, file), os.path.join(test_path, category, file))

# Process the "Real" folder (80% train)
split_files("Real", os.path.join(dataset_path, "Real"), train_ratio_real)

# Process all "Altered" subfolders (30% train)
for subfolder in ["Altered-Easy", "Altered-Medium", "Altered-Hard"]:
    split_files(subfolder, os.path.join(dataset_path, "Altered", subfolder), train_ratio_altered)

print("Dataset split completed! ✅")


Old split_dataset folder removed!
Dataset split completed! ✅


In [24]:
import os
import cv2
import numpy as np
import joblib

from tqdm import tqdm
from skimage import feature
from sklearn.svm import LinearSVC
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix

def extract_lbp_features(image_path):
    """Extracts LBP histogram features from a given image path."""
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Unable to read image {image_path}")
        return None

    image = cv2.resize(image, (64, 64))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    lbp = feature.local_binary_pattern(image, 24, 8, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 27), range=(0, 10))
    hist = hist.astype("float") / (hist.sum() + 1e-7)

    return hist

def main():
    train_x, train_y = [], []
    test_x, test_y = [], []

    # Define dataset paths
    train_live_path = "/content/split_dataset/train/Real"
    train_spoof_paths = [
        "/content/split_dataset/train/Altered-Medium",
        "/content/split_dataset/train/Altered-Easy",
        "/content/split_dataset/train/Altered-Hard"
    ]

    test_live_path = "/content/split_dataset/test/Real"
    test_spoof_paths = [
        "/content/split_dataset/test/Altered-Medium",
        "/content/split_dataset/test/Altered-Easy",
        "/content/split_dataset/test/Altered-Hard"
    ]

    # Process training live images
    print("Extracting LBPH features from training images (live)")
    for file in tqdm(sorted(os.listdir(train_live_path))):
        file_path = os.path.join(train_live_path, file)
        features = extract_lbp_features(file_path)
        if features is not None:
            train_x.append(features)
            train_y.append(1)  # Live samples

    # Process training spoof images from multiple directories
    print("Extracting LBPH features from training images (spoof)")
    for spoof_path in train_spoof_paths:
        for file in tqdm(sorted(os.listdir(spoof_path))):
            file_path = os.path.join(spoof_path, file)
            features = extract_lbp_features(file_path)
            if features is not None:
                train_x.append(features)
                train_y.append(0)  # Spoof samples

    # Shuffle training data
    train_x, train_y = shuffle(train_x, train_y, random_state=42)

    # Process testing live images
    print("Extracting LBPH features from testing images (live)")
    for file in tqdm(sorted(os.listdir(test_live_path))):
        file_path = os.path.join(test_live_path, file)
        features = extract_lbp_features(file_path)
        if features is not None:
            test_x.append(features)
            test_y.append(1)  # Live samples

    # Process testing spoof images from multiple directories
    print("Extracting LBPH features from testing images (spoof)")
    for spoof_path in test_spoof_paths:
        for file in tqdm(sorted(os.listdir(spoof_path))):
            file_path = os.path.join(spoof_path, file)
            features = extract_lbp_features(file_path)
            if features is not None:
                test_x.append(features)
                test_y.append(0)  # Spoof samples

    # Shuffle testing data
    test_x, test_y = shuffle(test_x, test_y, random_state=42)

    # Train SVM model
    print("Training SVM Model...")
    model = LinearSVC(C=100, max_iter=10000)
    model.fit(train_x, train_y)
    print("Training Completed!\n\nTesting Now...")

    # Save the trained model
    joblib.dump(model, "/content/fingerprint_spoof_svm.pkl")
    print("Model saved successfully as 'fingerprint_spoof_svm.pkl'")

    # Model evaluation
    pred = model.predict(test_x)
    con_matrix = confusion_matrix(test_y, pred)

    print("Confusion Matrix:\n", con_matrix)

    TP, FN, FP, TN = con_matrix.ravel()

    print("Precision of the SVM:", round((TP / (TP+FP)), 3))
    print("Recall of the SVM:", round((TP / (TP+FN)), 3))
    print("Accuracy of the SVM:", round(((TP + TN) / (TP + TN + FP + FN)), 3))

if __name__ == "__main__":
    print("Fingerprint spoof detection system based on SVM")
    main()


Fingerprint spoof detection system based on SVM
Extracting LBPH features from training images (live)


100%|██████████| 4800/4800 [00:12<00:00, 374.78it/s]


Extracting LBPH features from training images (spoof)


100%|██████████| 1706/1706 [00:04<00:00, 363.00it/s]
100%|██████████| 1793/1793 [00:04<00:00, 435.18it/s]
100%|██████████| 1427/1427 [00:03<00:00, 365.57it/s]


Extracting LBPH features from testing images (live)


100%|██████████| 1200/1200 [00:03<00:00, 335.58it/s]


Extracting LBPH features from testing images (spoof)


100%|██████████| 15361/15361 [00:39<00:00, 385.59it/s]
100%|██████████| 16138/16138 [00:41<00:00, 387.56it/s]
100%|██████████| 12845/12845 [00:33<00:00, 379.75it/s]


Training SVM Model...
Training Completed!

Testing Now...
Model saved successfully as 'fingerprint_spoof_svm.pkl'
Confusion Matrix:
 [[28044 16300]
 [  410   790]]
Precision of the SVM: 0.986
Recall of the SVM: 0.632
Accuracy of the SVM: 0.633


In [25]:
import os
import cv2
import numpy as np
import joblib
from skimage import feature
from sklearn.metrics import confusion_matrix, classification_report

# Load the trained SVM model
model = joblib.load("/content/fingerprint_spoof_svm.pkl")
print("Model loaded successfully!")

def extract_lbp_features(image_path):
    """ Extract LBP features from an input fingerprint image. """
    # Read the image
    image = cv2.imread(image_path)

    # Resize image to match training images
    image = cv2.resize(image, (64, 64))

    # Convert image to grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Extract LBP features
    lbp = feature.local_binary_pattern(image, 24, 8, method="uniform")

    # Get histogram of LBP
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 27), range=(0, 10))

    # Normalize the histogram
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-7)

    return hist

def predict_fingerprint(image_path):
    """ Predict whether the given fingerprint is LIVE or SPOOF. """
    try:
        # Extract features from the image
        features = extract_lbp_features(image_path)

        # Reshape features to match input format of the model
        features = features.reshape(1, -1)

        # Predict using the trained model
        prediction = model.predict(features)

        return prediction[0]  # Return 1 for LIVE, 0 for SPOOF
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None  # Return None if an error occurs

def predict_from_directory(directory_path, true_label):
    """
    Predict all images in a directory and collect results.
    true_label: 1 for LIVE, 0 for SPOOF
    """
    predictions = []
    actual_labels = []

    if not os.path.exists(directory_path):
        print(f"Directory '{directory_path}' not found!")
        return predictions, actual_labels

    print(f"\nProcessing images in: {directory_path}\n")

    for file in sorted(os.listdir(directory_path)):
        image_path = os.path.join(directory_path, file)

        # Check if the file is an image
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
            pred = predict_fingerprint(image_path)
            if pred is not None:
                predictions.append(pred)
                actual_labels.append(true_label)  # Assign the correct label
                print(f"Prediction for '{file}': {'LIVE' if pred == 1 else 'SPOOF'}")
        else:
            print(f"Skipping non-image file: {file}")

    return predictions, actual_labels

# Define test directories based on Colab paths
test_live_dir = "/content/split_dataset/test/Real"  # LIVE fingerprint images
test_spoof_dirs = [
    "/content/split_dataset/test/Altered-Easy",
    "/content/split_dataset/test/Altered-Medium",
    "/content/split_dataset/test/Altered-Hard"
]  # SPOOF fingerprint images

# Predict for LIVE images
preds_live, labels_live = predict_from_directory(test_live_dir, 1)  # LIVE = 1

# Predict for SPOOF images from multiple directories
preds_spoof, labels_spoof = [], []
for spoof_dir in test_spoof_dirs:
    preds, labels = predict_from_directory(spoof_dir, 0)  # SPOOF = 0
    preds_spoof.extend(preds)
    labels_spoof.extend(labels)

# Combine results
all_preds = preds_live + preds_spoof
all_labels = labels_live + labels_spoof

# Compute Performance Metrics
if len(all_preds) > 0:
    conf_matrix = confusion_matrix(all_labels, all_preds)
    report = classification_report(all_labels, all_preds, target_names=["SPOOF", "LIVE"])

    TN, FP, FN, TP = conf_matrix.ravel()  # Correct order

    precision = round(TP / (TP + FP + 1e-7), 3)  # Precision formula
    recall = round(TP / (TP + FN + 1e-7), 3)  # Recall formula
    accuracy = round((TP + TN) / (TP + TN + FP + FN + 1e-7), 3)  # Accuracy formula

    print("\nConfusion Matrix:\n", conf_matrix)
    print("\nClassification Report:\n", report)
    print("\nPerformance Metrics:")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"Accuracy: {accuracy}")
else:
    print("No valid test images found for evaluation.")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Prediction for '433__M_Left_thumb_finger_Zcut.BMP': SPOOF
Prediction for '433__M_Right_index_finger_CR.BMP': SPOOF
Prediction for '433__M_Right_index_finger_Obl.BMP': SPOOF
Prediction for '433__M_Right_index_finger_Zcut.BMP': SPOOF
Prediction for '433__M_Right_little_finger_Obl.BMP': SPOOF
Prediction for '433__M_Right_middle_finger_CR.BMP': SPOOF
Prediction for '433__M_Right_middle_finger_Obl.BMP': SPOOF
Prediction for '433__M_Right_middle_finger_Zcut.BMP': SPOOF
Prediction for '433__M_Right_ring_finger_Obl.BMP': SPOOF
Prediction for '433__M_Right_thumb_finger_CR.BMP': SPOOF
Prediction for '433__M_Right_thumb_finger_Obl.BMP': SPOOF
Prediction for '433__M_Right_thumb_finger_Zcut.BMP': SPOOF
Prediction for '434__M_Left_index_finger_CR.BMP': LIVE
Prediction for '434__M_Left_index_finger_Obl.BMP': LIVE
Prediction for '434__M_Left_index_finger_Zcut.BMP': LIVE
Prediction for '434__M_Left_little_finger_Obl.BMP': SPOOF
Prediction