# Setup

In [1]:
import os
import re
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/opt/conda'

2024-04-11 20:19:08.850550: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-11 20:19:08.894648: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-11 20:19:08.894677: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-11 20:19:08.896111: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-11 20:19:08.903492: I tensorflow/core/platform/cpu_feature_guar

# Testing 

#### Defining Functions 

In [2]:
def build_image_paths(folders):
    # List of folders
    # Create a list of image paths
    image_paths = []
    for folder in folders:
        image_paths += [os.path.join(folder, fname) for fname in os.listdir(folder) if fname.endswith('.png')]
    # Ensure the list is sorted
    image_paths = sorted(image_paths)
    return image_paths

def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.resize(image, [481, 600])  # Resize as expected by your model
    image = tf.cast(image, tf.uint8)  # Ensure image is uint8
    return image

def load_and_preprocess_data(image_path):
    image = preprocess_image(image_path)
    label = tf_parse_label_from_path(image_path)
    return image, label

def tf_parse_label_from_path(path):
    path_str = tf.strings.regex_replace(input=path, pattern=r'.*\/(\d+)_keystrokes.*', rewrite=r'\1')
    label = tf.strings.to_number(path_str, out_type=tf.int32)
    label = tf.cast(label, tf.uint8)
    return label

def shuffle_batch(features, labels):
    # Calculate batch size
    batch_size = tf.shape(features)[0]
    # Create an index to shuffle features and labels in the same order
    shuffled_indices = tf.random.shuffle(tf.range(start=0, limit=batch_size))
    # Apply gathered indices to shuffle the batch
    shuffled_features = tf.gather(features, shuffled_indices)
    shuffled_labels = tf.gather(labels, shuffled_indices)
    return shuffled_features, shuffled_labels

def augment(image, label, margin):
    # Margin value is (1-probability) of image getting flipped so a margin of .95 equals a 5% chance of augmentation
    # Code to flip image left or right
    image = tf.cond(tf.random.uniform([], 0, 1) > margin, lambda: tf.image.random_flip_left_right(image), lambda: image)
    # Code to flip image up or down 
    image = tf.cond(tf.random.uniform([], 0, 1) > margin, lambda: tf.image.random_flip_up_down(image), lambda: image)
    return image, label

def create_model():
    # Creates an untrained FPNet 
    model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=24, kernel_size=(1,3), activation='relu', input_shape=(481, 600, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=None), # No activation on final dense layer
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
    ])
    return model

def calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, n):
    correct_matches = 0
    for i in range(len(embeddings1)):
        # Compute Euclidean distances from embeddings1[i] to all embeddings2
        distances = np.linalg.norm(embeddings2 - embeddings1[i], axis=1)
        # Get the indices of the top n closest embeddings in embeddings2
        closest_indices = np.argsort(distances)[:n]
        # Check if the correct label is within these top n closest embeddings
        if labels1[i] in labels2[closest_indices]:
            correct_matches += 1
    # Calculate accuracy
    accuracy = correct_matches / len(embeddings1)
    return accuracy



#### Recreate Model

In [3]:
# Recreate the model architecture
recreated_model = create_model()
# Compile the recreated model
recreated_model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001, beta_1=.9, beta_2=.999),
    loss=tfa.losses.TripletSemiHardLoss()
)
# Load the weights
recreated_model.load_weights('./saved_model/fpnet_v5.h5')

2024-04-11 20:19:18.706530: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 380 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:65:00.0, compute capability: 8.6


#### Creating Image Dataset

In [4]:
# Defining test folder location
folders = ["./images4_png_7-9/7-9"]
# Create a list of image paths
image_paths = build_image_paths(folders)
# Create a Dataset from the list of image paths
image_paths_ds = tf.data.Dataset.from_tensor_slices(image_paths)
# Create a list of image paths
image_paths = build_image_paths(folders)
# Create a Dataset from the list of image paths
image_paths_ds = tf.data.Dataset.from_tensor_slices(image_paths)
# Create images and labels
images_ds = image_paths_ds.map(preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
labels_ds = image_paths_ds.map(tf_parse_label_from_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)

#### Splitting Dataset By User

In [5]:
# Placeholder for demonstration purposes
def preprocess_image_2(image):
    # Your preprocessing steps here
    # For example, resizing if not already resized
    return image

# Assuming images_ds is your dataset of images
# Apply preprocessing
dataset = tf.data.Dataset.zip((images_ds, labels_ds))

# Initialize containers for the split data
images1, images2 = [], []
labels1, labels2 = [], []

# Example assuming dataset elements are structured as (image, label)
# Remember: iterating like this loads the data into memory and may not be efficient for large datasets
for image, label in dataset:
    if len(images1) <= len(images2):
        images1.append(image.numpy())  # Convert to numpy if necessary
        labels1.append(label.numpy())
    else:
        images2.append(image.numpy())
        labels2.append(label.numpy())

#### Embedding Test Datasets

In [None]:
# Stacking tensors
images1_tensor = tf.stack(images1, axis=0)
images2_tensor = tf.stack(images2, axis=0)
# Predict on images
embeddings1 = recreated_model.predict(images1_tensor)
embeddings2 = recreated_model.predict(images2_tensor)
# Convert to numpy
labels1 = np.array(labels1)
labels2 = np.array(labels2)

#### Calculate Rank N Accuracy

In [None]:
# Calculate rank-1 accuracy
accuracy = calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, 1)
print(f"Rank-1 Accuracy: {accuracy}")

#### Save Embeddings 

In [None]:
np.savetxt('embeddings1_fpnet.csv', embeddings1, delimiter=',', header=','.join(['column{}'.format(i) for i in range(1, embeddings1.shape[1] + 1)]), comments='')
np.savetxt('embeddings2_fpnet.csv', embeddings2, delimiter=',', header=','.join(['column{}'.format(i) for i in range(1, embeddings2.shape[1] + 1)]), comments='')