To execute this notebook, you need to either
 - Download a pre-trained model
 - Train an example model by excecuting the model_training_tutorial.ipynb notebook

In this tutorial we will learn to:
- Load a previously trained model
- Extract DeepPrint features from fingerprint images
- Evaluate the performance of the extracted fixed-length representations

## Embedding extraction

After training the model, we can extract the DeepPrint features for the fingerprint images. This is done by calling the `extract` method of the `DeepPrintExtractor` class.

In [None]:
import os

from flx.extractor.fixed_length_extractor import get_DeepPrint_Tex, get_DeepPrint_TexMinu, DeepPrintExtractor

# Dimension and number of training subjects must be known to load the pre-trained model

# To load the pre-trained model parameters use num_training_subjects=8000
extractor: DeepPrintExtractor = get_DeepPrint_TexMinu(num_training_subjects=8000, num_dims=256)

# To load the pre-trained model parameters use
MODEL_DIR: str = os.path.abspath("/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/DeepPrint_TexMinu_512") # Path to the directory containing the model parameters
extractor.load_best_model(MODEL_DIR)

Now we need to specify the dataset, for which we want to extract the embeddings

In [None]:
import os

from flx.data.dataset import *
from flx.data.image_loader import SFingeLoader
from flx.data.transformed_image_loader import TransformedImageLoader
from flx.image_processing.binarization import LazilyAllocatedBinarizer
from flx.data.image_helpers import pad_and_resize_to_deepprint_input_size

# NOTE: If this does not work, enter the absolute path to the notebooks/example-dataset directory here! 
DATASET_PATH: str = os.path.abspath("/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/FVC2006_DB3_A")

# We will use the SFingeLoader to load the images from the dataset
image_loader = TransformedImageLoader(
        images=SFingeLoader(DATASET_PATH),
        poses=None,
        transforms=[
            LazilyAllocatedBinarizer(5.0),
            pad_and_resize_to_deepprint_input_size,
        ],
    )

image_dataset: Dataset = Dataset(image_loader, image_loader.ids)

# The second value is for the minutiae branch, which we do not have in this example
texture_embeddings, minutia_embeddings = extractor.extract(image_dataset)

In [None]:
from flx.benchmarks.matchers import CosineSimilarityMatcher
from flx.data.embedding_loader import EmbeddingLoader

# We concatenate texture and minutia embedding vectors
embeddings = EmbeddingLoader.combine(texture_embeddings, minutia_embeddings)


In [None]:
embeddings

In [7]:
import os

def gray_code(n):
    return n ^ (n >> 1)

def convert_to_gray_code(x, y):
    gray_x = gray_code(int(x))
    gray_y = gray_code(int(y))
    return bin(gray_x)[2:], bin(gray_y)[2:]

def pad_binary(binary, length):
    return binary.zfill(length)

def process_txt_file(txt_file, output_folder):
    with open(txt_file, 'r') as f:
        lines = f.readlines()
        x, y = map(float, lines[0].split())
        
        gray_x, gray_y = convert_to_gray_code(x, y)
        
        concatenated_binary = gray_x + gray_y
        
        max_length = len(concatenated_binary)
        
        padded_binary = pad_binary(concatenated_binary, max_length)
        
        output_file = os.path.join(output_folder, os.path.splitext(os.path.basename(txt_file))[0] + '_result.txt')
        with open(output_file, 'w') as out_f:
            out_f.write(padded_binary)

def process_folder(input_folder, output_folder):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.txt'):
            process_txt_file(os.path.join(input_folder, filename), output_folder)

# Specify input and output folders
input_folder = '/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/walking_core_points'
output_folder = '/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/walking_binary_core'

# Process all .txt files in the input folder
process_folder(input_folder, output_folder)


For Fingerflow convert core to binary using xor 


In [None]:
import os

def convert_to_binary(x, y):
    binary_x = bin(int(x))[2:]
    binary_y = bin(int(y))[2:]
    return binary_x, binary_y

def pad_binary(binary, length):
    return binary.zfill(length)

def xor_binary(binary1, binary2):
    return bin(int(binary1, 2) ^ int(binary2, 2))[2:]

def process_txt_file(txt_file):
    with open(txt_file, 'r') as f:
        lines = f.readlines()
        x1, y1 = map(float, lines[0].split())
        x2, y2 = map(float, lines[1].split())
        
        binary_x1, binary_y1 = convert_to_binary(x1, y1)
        binary_x2, binary_y2 = convert_to_binary(x2, y2)
        
        s1 = binary_x1 + binary_y1
        s2 = binary_x2 + binary_y2
        
        max_length = max(len(s1), len(s2))
        s1 = pad_binary(s1, max_length)
        s2 = pad_binary(s2, max_length)
        
        result = xor_binary(s1, s2)
        
        output_file = os.path.splitext(txt_file)[0] + '_result.txt'
        with open(output_file, 'w') as out_f:
            out_f.write(result)

def process_folder(input_folder):
    for filename in os.listdir(input_folder):
        if filename.endswith('.txt'):
            process_txt_file(os.path.join(input_folder, filename))

# Specify input folder containing .txt files
input_folder = '/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/fixedlength-core_points2'

# Process all .txt files in the folder
process_folder(input_folder)


Random projection and all

In [3]:
# import numpy as np
# from sklearn.decomposition import PCA

# # Define z-score normalization function
# def z_score_normalize(data):
#     mean_val = np.mean(data)
#     std_dev = np.std(data)
#     normalized_data = (data - mean_val) / std_dev
#     return normalized_data

# # Define function to apply median filter
# def median_filter(data):
#     # Apply median filter to the entire array
#     filtered_data = np.median(data)
#     print(filtered_data)
#     return filtered_data

# # Define function to apply random projection
# def random_projection(data, n_components):
#     # For 1D arrays, random projection doesn't make sense, but you can just return the original array
#     return data

# # Define function to convert normalized array to binary array
# def to_binary_array(data, threshold):
#     return (data >= threshold).astype(int)

# # Choose normalization method and threshold
# normalize_method = z_score_normalize  # Choose either min_max_scale or z_score_normalize
# threshold = 0.3

# # List to store binary arrays
# binary_arrays = []

# # Normalize, filter, and project each array
# for array in texture_embeddings._array:
#     # Normalize data
#     normalized_array = normalize_method(array)
    
#     # Apply median filter
#     filtered_array = median_filter(normalized_array)
    
#     # # Apply random projection
#     # projected_array = random_projection(filtered_array, n_components=10)  # Adjust n_components as needed
    
#     # Convert to binary array
#     binary_array = to_binary_array(filtered_array, threshold)
    
#     # Append to list
#     binary_arrays.append(binary_array)

# # Print binary arrays
# for i, binary_array in enumerate(binary_arrays):
#     print(f"Binary array for array {i}:")
#     print(binary_array)


Hardcored Core points

In [None]:
# import numpy as np

# # Define z-score normalization function
# def z_score_normalize(data):
#     mean_val = np.mean(data)
#     std_dev = np.std(data)
#     normalized_data = (data - mean_val) / std_dev
#     return normalized_data

# # Define function to convert normalized array to binary array
# def to_binary_array(data, threshold):
#     return (data >= threshold).astype(int)

# # Choose normalization method and threshold
# normalize_method = z_score_normalize  # Choose either min_max_scale or z_score_normalize
# threshold = 0.5

# # List to store binary arrays
# binary_arrays = []

# # Normalize and convert each array to binary
# for array in texture_embeddings._array:
#     # Sort the array (not sure if this step is necessary based on your code)
#     sorted(array)

#     # Normalize the array
#     normalized_array = normalize_method(array)
    
#     # Convert the normalized array to binary using the threshold
#     binary_array = to_binary_array(normalized_array, threshold)
    
#     # Append the binary array to the list
#     binary_arrays.append(binary_array)

# # Generate the permutation pattern from core points (assuming core_points is available)
# core_points = [0, 1, 0, 1]  # Example core points as a binary array

# # Function to permute binary array in intervals using a pattern
# def permute_binary_array_in_intervals(binary_array, pattern):
#     permuted_array = []
#     interval_length = len(binary_array) // len(pattern)
#     for i in range(0, len(binary_array), interval_length):
#         segment = binary_array[i:i+interval_length]
#         permuted_segment = [segment[j] for j in pattern]
#         permuted_array.extend(permuted_segment)
#     return np.array(permuted_array)

# # Permute all binary arrays using the generated pattern
# permuted_arrays = []
# for binary_array in binary_arrays:
#     permuted_binary_array = permute_binary_array_in_intervals(binary_array, core_points)
#     permuted_arrays.append(permuted_binary_array)


# # Print the permuted binary arrays
# for i, permuted_binary_array in enumerate(permuted_arrays):
#     print(f"Permuted binary array for array {i}:")
#     print(permuted_binary_array)


This is the permutation with core as binary value seed

In [None]:
import numpy as np
import os

# Define z-score normalization function
def z_score_normalize(data):
    mean_val = np.mean(data)
    std_dev = np.std(data)
    normalized_data = (data - mean_val) / std_dev
    return normalized_data

# Define function to convert normalized array to binary array
def to_binary_array(data, threshold):
    return (data >= threshold).astype(int)

# Choose normalization method and threshold
normalize_method = z_score_normalize  # Choose either min_max_scale or z_score_normalize
threshold = 0.3

# List to store binary arrays
binary_arrays = []

# Normalize and convert each array to binary
for array in embeddings._array:
    # Sort the array (not sure if this step is necessary based on your code)
    sorted(array)

    # Normalize the array
    normalized_array = normalize_method(array)
    
    # Convert the normalized array to binary using the threshold
    binary_array = to_binary_array(normalized_array, threshold)
    
    # Append the binary array to the list
    binary_arrays.append(binary_array)
len(binary_arrays)

In [None]:

# Folder containing core point files
pose_points_folder = "/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/example-dataset-fvc2006db3a_pose-binary"

# Permute all binary arrays using the core points from files in the folder
permuted_array = []

for binary_array in binary_arrays:
    for core_points_file in sorted(os.listdir(core_points_folder)):
    # Read core points from the current file
        with open(os.path.join(core_points_folder, core_points_file), "r") as f:
            core_points_str = f.read().strip()


    # Convert the core points string to a list of integers
        core_points = [int(bit) for bit in core_points_str]

    # Function to permute binary array in intervals using the pattern
    def permute_binary_array_in_intervals(binary_array, pattern):
        permuted_array = []
        interval_length = len(binary_array) // len(pattern)
        for i in range(0, len(binary_array), interval_length):
            segment = binary_array[i:i+interval_length]
            permuted_segment = [segment[j] for j in pattern]
            permuted_array.extend(permuted_segment)
        return np.array(permuted_array)

    # Permute the binary array using the read core points
    permuted_binary_array = permute_binary_array_in_intervals(binary_array, core_points)
    permuted_array.append(permuted_binary_array)

# Print the permuted binary arrays
for i, permuted_binary_array in enumerate(permuted_array):
    print(f"Permuted binary array for array {i}:")
    print(permuted_binary_array)


This is for tensors to normalise and with threshold get binary array

In [None]:
# Folder containing core point files
pose_points_folder = "/home/mt0/22CS60R42/fixed-length-fingerprint-extractors/notebooks/example-dataset-fvc2006db3a_pose-binary"
permuted_array = []
pose_point_patterns = []
for core_points_file in sorted(os.listdir(pose_points_folder)):
    # Read core points from the current file
        with open(os.path.join(pose_points_folder, core_points_file), "r") as f:
            core_points_str = f.read().strip()
            # print(core_points_str)
            pose_point_patterns.append(core_points_str)

len(pose_point_patterns)

For permutation one

In [None]:
def transform(binary, pattern):
    # Convert binary and pattern strings into lists for easier manipulation
    binary_list = list(binary)
    pattern_list = list(pattern)
    
    # Get the lengths of binary and pattern lists
    n = len(binary_list)
    m = len(pattern_list)

    # Initialize variables
    result = []         # To store the final result
    skipped_bits = []   # To store bits skipped based on the pattern
    j = 0               # Pointer to track the current position in the pattern

    # Iterate through each bit in the binary string
    for i in range(n):
        if pattern_list[j] == '1':  
            # If the current pattern bit is '1', add the corresponding binary bit to the result
            result.append(str(binary_list[i]))  # Ensure this is a string
        else:
            # If the current pattern bit is '0', skip the binary bit and store it in skipped_bits
            skipped_bits.append(str(binary_list[i]))  # Ensure this is a string
        
        # Move to the next pattern bit
        j += 1  
        
        # Reset the pattern pointer when the end of the pattern is reached
        if j == m:
            j = 0
            
    # Append all skipped bits to the result at the end
    result.extend(str(bit) for bit in skipped_bits)  # Convert skipped bits to strings
    
    # Convert the result list back into a string and return it
    return ''.join(result)

# Function to handle lists of binary and pattern strings
def transform_all(binaries, patterns):
    # Apply the transform function to each pair of binary and pattern
    results = [
        transform(binary, pattern) 
        for binary, pattern in zip(binaries, patterns)
    ]
    return results

# Example input
binary = binary_arrays
pattern = pose_point_patterns

# Apply the transformation to each binary-pattern pair
result = transform_all(binary, pattern)

# Print the results
# print(len(result))
for x in result:
    print("OUTPUT",x)


# def hamming_distance_numpy(x, y):
#     # Ensure both strings are of the same length
#     assert len(x) == len(y), "Binary strings must be of the same length"
#     return np.sum(np.array(x) != np.array(y))

def hamming_dist(s1, s2):
    if len(s1) != len(s2):
        return -1
    return sum(c1 != c2 for c1, c2 in zip(s1, s2))

num_rows = 10
num_cols = 10
hamming_distance_matrix = np.zeros((num_rows, num_cols), dtype=int)

for i in range(num_rows):
    x = result[i]
    for j in range(num_cols):
        y = result[j]
        hamming_distance_matrix[i, j] = hamming_dist(x, y)


for x in hamming_distance_matrix:
    print(x)

In [None]:
def hamming_dist(s1, s2):
    if len(s1) != len(s2):
        return -1
    return sum(c1 != c2 for c1, c2 in zip(s1, s2))

# driver code
for i in range(len(result)):
    for j in range(len(result)):
        print("hamming distance between", f"{i}" and f"{j}",f"{hamming_dist(result[i], result[j]):=}")
    


In [None]:
# Function to calculate Hamming distance between two binary arrays
def hamming_distance(array1, array2):
    return np.sum(array1 != array2)

# Initialize lists to store binary arrays for each person
binary_arrays_same_person = []
binary_arrays_diff_person = []



# Populate binary arrays lists
for i, permuted_binary_array in enumerate(permuted_array):
    # Append to appropriate list based on person index
    person_index = i // 10  # Assuming each person has 10 impressions
    if person_index < 6:
        binary_arrays_same_person.append(permuted_binary_array)
    else:
        binary_arrays_diff_person.append(permuted_binary_array)

# Calculate Hamming distance for same person pairs
print("Hamming distances for same person pairs:")
for i in range(len(binary_arrays_same_person)):
    for j in range(i + 1, len(binary_arrays_same_person)):
        distance = hamming_distance(binary_arrays_same_person[i], binary_arrays_same_person[j])
        print(f"Impression {i} vs Impression {j}: {distance}")

# Calculate Hamming distance for different person pairs
print("\nHamming distances for different person pairs:")
for i in range(len(binary_arrays_same_person)):
    for j in range(len(binary_arrays_diff_person)):
        distance = hamming_distance(binary_arrays_same_person[i], binary_arrays_diff_person[j])
        print(f"Impression {i} (Same Person) vs Impression {j} (Different Person): {distance}")


Hamming distance code


In [None]:
import numpy as np

# Function to calculate Hamming distance between two binary arrays
def hamming_distance(array1, array2):
    return np.sum(array1 != array2)

# Initialize lists to store binary arrays for each person
binary_arrays_same_person = []
binary_arrays_diff_person = []

# Normalize and convert each array to binary
for array in result:
    # normalized_array = normalize_method(array)
    # binary_array = to_binary_array(normalized_array, threshold)
    binary_array = array
    # Append to appropriate list based on person index
    person_index = i % 12  # Assuming each person has 10 impressions
    if person_index < 12:
        binary_arrays_same_person.append(binary_array)
    else:
        binary_arrays_diff_person.append(binary_array)

# Calculate Hamming distance for same person pairs
print("Hamming distances for same person pairs:")
for i in range(len(binary_arrays_same_person)):
    for j in range(i + 1, len(binary_arrays_same_person)):
        distance = hamming_distance(binary_arrays_same_person[i], binary_arrays_same_person[j])
        print(f"Impression {i} vs Impression {j}: {distance}")

# Calculate Hamming distance for different person pairs
print("\nHamming distances for different person pairs:")
for i in range(len(binary_arrays_diff_person)):
    for j in range(len(binary_arrays_diff_person)):
        distance = hamming_distance(binary_arrays_diff_person[i], binary_arrays_diff_person[j])
        print(f"Impression {i} vs Impression {j}: {distance}")


In [None]:
# def hamming_distance_numpy(x, y):
#     # Ensure both strings are of the same length
#     assert len(x) == len(y), "Binary strings must be of the same length"
#     return np.sum(np.array(x) != np.array(y))

def hamming_dist(s1, s2):
    if len(s1) != len(s2):
        return -1
    return sum(c1 != c2 for c1, c2 in zip(s1, s2))

num_rows = 10
num_cols = 10
hamming_distance_matrix = np.zeros((num_rows, num_cols), dtype=int)

for i in range(num_rows):
    x = binary_arrays[i]
    for j in range(num_cols):
        y = binary_arrays[j]
        hamming_distance_matrix[i, j] = hamming_dist(x, y)


for x in hamming_distance_matrix:
    print(x)



## Benchmarking

To evaluate the embeddings, we want to run a benchmark on them. For this, we must first specify the type of benchmark, and which comparisons should be run.

In [None]:
from flx.scripts.generate_benchmarks import create_verification_benchmark

NUM_IMPRESSIONS_PER_SUBJECT = 10
benchmark = create_verification_benchmark(
    #subjects=list(range(100, image_dataset.num_subjects + 100)),
    subjects=list(range(image_dataset.num_subjects)),
    impressions_per_subject=list(range(NUM_IMPRESSIONS_PER_SUBJECT))
)

Now we can run the benchmark. To do this, we must first specify the matcher (in our case cosine similarity of the embeddings)

In [None]:
from flx.benchmarks.matchers import CosineSimilarityMatcher
from flx.data.embedding_loader import EmbeddingLoader

# We concatenate texture and minutia embedding vectors
embeddings = EmbeddingLoader.combine(texture_embeddings, minutia_embeddings)
matcher = CosineSimilarityMatcher(EmbeddingLoader.combine(texture_embeddings, minutia_embeddings))

results = benchmark.run(matcher)

print(f"Equal-Error-Rate: {results.get_equal_error_rate()}")

To visualize the results, we can plot a DET curve. (Do not wonder if it is empty, probably the model is not trained enough. Take a look at the EER instead.)

In [None]:
from flx.visualization.plot_DET_curve import plot_verification_results

figure_path = "DET_curve"

# Lists are used to allow for multiple models to be plotted in the same figure
plot_verification_results(figure_path, results=[results], model_labels=["DeepPrint_TexMinu"], plot_title="example-dataset - verification")