# Grid search code

In [1]:
train_folders = ["images4_png_4-6/4-6", "images4_png_1", "images4_png_2", 'images4_png_3']
test_folders = ["./images4_png_7-9/7-9"]

In [2]:
import random
import re
import json
import os
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
from time import time
from tqdm import tqdm
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorboard.plugins.hparams import api as hp
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/opt/conda'

def get_paths(folders):
    # List of folders
    # Create a list of image paths
    image_paths = []
    for folder in folders:
        image_paths += [os.path.join(folder, fname) for fname in os.listdir(folder) if fname.endswith('.png')]
    # Ensure the list is sorted
    image_paths = sorted(image_paths)
    return image_paths

def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=1)
    image = tf.image.resize(image, [481, 600])  # Resize as expected by your model
    image = tf.cast(image, tf.uint8)  # Ensure image is uint8
    return image

def augment(image, label, margin):
    image = tf.cond(tf.random.uniform([], 0, 1) > margin, lambda: tf.image.random_flip_left_right(image), lambda: image)
    image = tf.cond(tf.random.uniform([], 0, 1) > margin, lambda: tf.image.random_flip_up_down(image), lambda: image)
    # You can add more conditional augmentations here.
    return image, label

def parse_label_from_path(path):
    match = re.search(r'(\d+)_keystrokes', path.numpy().decode('utf-8'))
    label = int(match.group(1)) if match else 0
    return tf.cast(label, tf.uint8)

def load_and_preprocess_data(image_path):
    image = preprocess_image(image_path)
    label = tf_parse_label_from_path(image_path)
    return image, label

def tf_parse_label_from_path(path):
    return tf.py_function(parse_label_from_path, [path], Tout=tf.uint8)

def calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, n):
    correct_matches = 0
    for i in range(len(embeddings1)):
        # Compute Euclidean distances from embeddings1[i] to all embeddings2
        distances = np.linalg.norm(embeddings2 - embeddings1[i], axis=1)
        
        # Get the indices of the top 10 closest embeddings in embeddings2
        closest_indices = np.argsort(distances)[:n]
        
        # Check if the correct label is within these top 10 closest embeddings
        if labels1[i] in labels2[closest_indices]:
            correct_matches += 1
    
    # Calculate accuracy
    accuracy = correct_matches / len(embeddings1)
    return accuracy

def create_model():
    model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=24, kernel_size=(1,3), activation='relu', input_shape=(481, 600, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=None), # No activation on final dense layer
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
])
    return model



def create_dataset(folders):
    # Assuming 'image_paths' is a list of paths to the images
    image_paths_ds = tf.data.Dataset.from_tensor_slices(get_paths(folders))
    # Apply the `load_and_preprocess_data` function to each item
    dataset = image_paths_ds.map(load_and_preprocess_data, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset

def augment_dataset(dataset, augment_set, margin):
    def augment(image, label):
        image = tf.cond(tf.random.uniform([], 0, 1) > margin, lambda: tf.image.random_flip_left_right(image), lambda: image)
        image = tf.cond(tf.random.uniform([], 0, 1) > margin, lambda: tf.image.random_flip_up_down(image), lambda: image)
        # You can add more conditional augmentations here.
        return image, label

    # Apply data augmentation only to the training dataset
    if augment_set == True:
        dataset = dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
    # Batching and prefetching
    return dataset

def batch_dataset(dataset, batch_size):
    dataset = dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset

2024-04-09 19:36:02.151493: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-09 19:36:02.195655: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 19:36:02.195687: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 19:36:02.197234: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-09 19:36:02.205034: I tensorflow/core/platform/cpu_feature_guar

In [3]:

def evaluate(dataset, optimizer,learning_rate):
    # Function to preprocess the images if needed (e.g., resizing)
    def preprocess_image_2(image):
        # Omit normalization if the model wasn't trained with normalized images
        # If there were other preprocessing steps during training, apply them here
        return image

    # Split the dataset into two parts: for each user, one for each image
    images1 = []
    images2 = []
    labels1 = []
    labels2 = []

    for image_batch, label_batch in dataset:
        images = tf.map_fn(preprocess_image_2, image_batch, dtype=tf.uint8)
        # Splitting the images and labels into two separate lists
        images1.append(images[::2])  # Take every first image in the pair
        images2.append(images[1::2])  # Take every second image in the pair
        labels1.append(label_batch[::2])  # Assume labels are the same for both images of a user
        labels2.append(label_batch[1::2])

    # Concatenate all batches together
    images1 = tf.concat(images1, axis=0)
    images2 = tf.concat(images2, axis=0)
    labels1 = tf.concat(labels1, axis=0)
    labels2 = tf.concat(labels2, axis=0)


    # Recreate the model architecture
    recreated_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=24, kernel_size=(1,3), activation='relu', input_shape=(481, 600, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(1,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(1,2), strides=(1,2), padding='same'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=None), # No activation on final dense layer
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
])
    opt = optimizer(learning_rate=learning_rate)
    # Compile the recreated model
    recreated_model.compile(
        optimizer=opt,
        loss=tfa.losses.TripletSemiHardLoss()
    )
                    # Load the weights
    recreated_model.load_weights('./saved_model/fpnet_test.h5')

    # Generate embeddings
    # Assuming recreated_model is your model
    embeddings1 = recreated_model.predict(images1)
    embeddings2 = recreated_model.predict(images2)
    labels1 = labels1.numpy()
    labels2 = labels2.numpy()
    accuracies = calculate_rank_n_accuracy(embeddings1, embeddings2, labels1, labels2, 1)
    return accuracies

In [4]:
from tensorflow.keras.callbacks import ModelCheckpoint

model_checkpoint_callback = ModelCheckpoint(
    filepath='saved_model/fpnet_test.h5',
    save_weights_only=True,
    monitor='loss',  # Change this to 'loss'
    mode='min',
    save_best_only=True,
    verbose=1)
# Compile the model

In [None]:
#print(f"Trying batch size {batch_size}, learning rate {learning_rate}, optimizer {optimizer}")
train = create_dataset(train_folders)
train = augment_dataset(train, True, .95)
train = batch_dataset(train, 256)

#test = create_dataset(test_folders)
#test = batch_dataset(test, 16606)



model = create_model()
learning_rate=.001
opt = tf.keras.optimizers.Adam
model.compile(optimizer=opt(.001), loss=tfa.losses.TripletSemiHardLoss())
history = model.fit(train, epochs=100, verbose=True, callbacks=[model_checkpoint_callback])  # Adjust epochs as needed
# Define your parameter grid
#val_accuracy = (evaluate(test, opt, learning_rate))

In [None]:
test = create_dataset(test_folders)
test = batch_dataset(test, 16606)
val_accuracy = (evaluate(test, tf.keras.optimizers.Adam, .001))

2024-04-09 19:36:35.409222: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 43457 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:65:00.0, compute capability: 8.6


In [None]:
val_accuracy

In [None]:
#0%
#For 10 epochs
#For 10000 in train, 1000 in test
#Augment margin 1
#Acc 0.322

In [None]:
#5%
#For 10 epochs
#For 10000 in train, 1000 in test
#Augment margin .95
#Acc 0.336

In [None]:
#10%
#For 10 epochs
#For 10000 in train, 1000 in test
#Augment margin .90
#Acc 0.136


In [None]:
#15%
#For 10 epochs
#For 10000 in train, 1000 in test
#Augment margin .85
#Acc 0.256

In [None]:
#20%
#For 10 epochs
#For 10000 in train, 1000 in test
#Augment margin .80
#Acc 0.222

In [None]:
#25%
#For 10 epochs
#For 10000 in train, 1000 in test
#Augment margin .75
#Acc 0.222