In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_addons as tfa
from pathlib import Path
import numpy as np
import os

In [None]:
tf.__version__

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
# model = tf.keras.models.load_model("ckpt/matrix/2206080905/serving/")
model = tf.keras.models.load_model("/home/hoang/workspace/github/keystroke-dynamic-model/logs/1665664282/checkpoints/")

In [None]:
def build_encoder(encoder):
    inputs = encoder.inputs
    outputs = encoder(inputs)
    outputs = tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1))(outputs)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
embed_model = build_encoder(model)
# embed_model = model
embed_model.summary()

In [None]:
def parse_fn(example_proto):
    features = {"data": tf.io.FixedLenFeature((), tf.string),
                "label": tf.io.FixedLenFeature((), tf.int64),
                }
    parsed_features = tf.io.parse_single_example(example_proto, features)
    data = tf.io.decode_raw(parsed_features["data"], tf.float32)
    data = tf.reshape(data, shape=(70, 5))
    return data, parsed_features["label"]


def load_tfrecord(filepath, batch_size=128, shuffle=True):
    AUTOTUNE = tf.data.experimental.AUTOTUNE
    dataset = tf.data.TFRecordDataset(filepath, num_parallel_reads=4)
    dataset = dataset.map(parse_fn, num_parallel_calls=4)
    dataset = dataset.batch(batch_size)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=12345)
    return dataset.prefetch(AUTOTUNE)

In [None]:
test_dataset = load_tfrecord(filepath=list(map(str, Path("../data/max_1000/dev/").glob("**/*.tfrecord"))), batch_size=64, shuffle=False)

In [None]:
X_test = []
y_test = []
for batch_idx, batch in enumerate(test_dataset):
    x, y = batch
    X_test.append(x)
    y_test.append(y)
    if batch_idx == 10:
        break
        pass

X_test = np.concatenate(X_test)
y_test = np.concatenate(y_test)

print(X_test.shape, y_test.shape)

In [None]:
# X_test = np.load("processed_data/X_train.npy")
# y_test = np.load("processed_data/y_train.npy")

# print(X_test.shape, y_test.shape)

In [None]:
# import random
# from sklearn.manifold import TSNE
# import matplotlib.pyplot as plt

# vectors = embed_model(X_test).numpy()
# labels = y_test

# print(vectors.shape, labels.shape)

# if vectors.shape[-1] > 2:
#     print("Reducing dimension of data ... ")
# #     embeds = TSNE(n_components=2).fit_transform(vectors)

# plt.figure(figsize=(8, 6), facecolor="azure")
# labels = np.squeeze(labels)
# for label in np.unique(labels):
#     tmp = embeds[labels==label]
#     plt.scatter(tmp[:, 0], tmp[:, 1], label=label)

# plt.legend()
# plt.show()

In [None]:
import random
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

vectors = embed_model(X_test).numpy()
labels = y_test

In [None]:
vectors

In [None]:
from tqdm import tqdm

nrows = 4
ncols = 4

fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(20,16))
norm=plt.Normalize(-22,22)

for i in tqdm(range(1 * 1)):

    sample_classes = random.sample(labels.tolist(), k=10)
    
    x = []
    y = []
    
    for c in sample_classes:
        
        v = vectors[labels == c]
        l = labels[labels == c]
        x.append(v)
        y.append(l)
    
    x = np.concatenate(x)
    y = np.concatenate(y)
    
#     print(x.shape, y.shape)

    if x.shape[-1] > 2:
#         print("Reducing dimension of data ... ")
        embs = TSNE(n_components=2).fit_transform(x)

#     plt.figure(figsize=(8, 6), facecolor="azure")
    y = np.squeeze(y)
    
    ax = axes[i//ncols][i%ncols]
#     ax.set_xlim([-100, 100])
#     ax.set_ylim([-100, 100])
    
    for l in np.unique(y):
        
        tmp = embs[y==l]
#         plt.scatter(tmp[:, 0], tmp[:, 1], label=l)
        ax.scatter(tmp[:, 0], tmp[:, 1], label=l, norm=norm, vmin=-100, vmax=100)
    
    
    plt.legend()

# plt.legend()
plt.show()

In [None]:
# TEST AUGMENT ON TRAIN SET


# TEST AUGMENT ON TEST SET


In [None]:
import random
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

vectors = embed_model(X_test).numpy()
labels = y_test

FN_total = 0
FN = 0

FP_total = 0
FP = 0

threshold = 0.3

for label in np.unique(labels):
    metric = 'cosine'
    
    positive = vectors[label == labels]
    negative = vectors[label != labels]
    
    d_positive = cdist(positive, positive, metric=metric)
    d_negative = cdist(positive, negative, metric=metric)
    
#     print(np.mean(d_positive), np.mean(d_negative))
    
    d_positive = np.reshape(d_positive, (-1,))
    d_positive = d_positive[d_positive >= 1e-9]
    d_negative = np.reshape(d_negative, (-1,))
    
    if len(d_positive) == 0:
        print("FAIL ON GET POSITIVE, IGNORE")
        continue
    
#     plt.boxplot([d_positive, d_negative], whis=5)
#     plt.legend(['d_positive', 'd_negative'])
#     plt.show()

    fn = np.sum(d_negative < threshold) / len(d_negative) 
    fp = np.sum(d_positive > threshold) / len(d_positive)
    
    FP_total += 1
    FP += fp
    
    FN_total += 1
    FN += fn
    
    if label % 100 == 0:
        print(label, "FN: {}, FP: {}".format(FN/FN_total, FP/FP_total))

In [None]:
import json
from pathlib import Path

for filepath in Path("/home/hoang/Downloads/log_case/").glob("**/*.json"):
    with open(filepath) as f:
        raw_data = json.load(f)
    if isinstance(data, list):
        pass

In [None]:
for i in range(16):
    print(i/16)