In [24]:
import json
import numpy as np


In [25]:
labels_dataset = "./data/labels/1 label dataset.json"
values_dataset = "./data/values/1 values dataset.json"

In [26]:
landmark_list = json.load(open(values_dataset))
timestamps = [float(x) for x in list(landmark_list.keys())]

#loading labels from the key_log.txt file with format: timestamp,key
labels = json.load(open(labels_dataset))

In [27]:
len(labels), len(timestamps), len(landmark_list)

(229, 4796, 4796)

In [28]:
values = []
for val in landmark_list.values():
    values.append(list(val.values()))

values = np.array(values)
values.shape

(4796, 21, 3)

In [29]:
data = []
window_size = 14
overlap = 3
for i in range(0, len(values) - window_size, overlap):
    data.append(values[i:i+window_size])

data = np.array(data)
data.shape

(1594, 14, 21, 3)

# Training autoencoder for self-supervised learning (easy way)
### This helps learning representation of dynamic gestures

In [30]:
import tensorflow as tf
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K

input_shape = Input(shape=(window_size, 21, 3))
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_shape)
x = MaxPooling2D((2, 3), padding='same')(x)
encoded = Conv2D(32, (3, 3), activation='relu', padding='same', name = "encoder")(x)
encoder_model = Model(input_shape, encoded)

decoder_input = Input(shape = (encoder_model.output_shape[1:]))
#x = Conv2D(32, (3, 3), activation='relu', padding='same')(decoder_input)
x = UpSampling2D((2, 3))(decoder_input)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((1, 1))(x)
decoded = Conv2D(3, (3, 3), activation='relu', padding='same')(x)
decoder_model = Model(decoder_input, decoded)

encoded_representation = encoder_model(input_shape)
decoded_output = decoder_model(encoded_representation)
autoencoder = Model(inputs=input_shape, outputs=decoded_output)

autoencoder.summary()

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 14, 21, 3)]       0         
                                                                 
 model_4 (Functional)        (None, 7, 7, 32)          5088      
                                                                 
 model_5 (Functional)        (None, 14, 21, 3)         5059      
                                                                 
Total params: 10147 (39.64 KB)
Trainable params: 10147 (39.64 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [31]:
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(data, data, epochs=30, batch_size=16, shuffle=True, validation_split=0.1)

Epoch 1/30


2023-06-20 17:49:09.508213: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
  6/180 [>.............................] - ETA: 1s - loss: 0.0046

2023-06-20 17:49:12.187730: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x1685a6110>

# Finetuning the encoder part

In [43]:
#fine tuning the encoder model
from keras.models import Sequential
from keras.layers import Flatten
for layer in encoder_model.layers:
    layer.trainable = False

additional_layers = Sequential([
    Input(shape = (encoder_model.output_shape[1:])),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(128, activation='relu'),
    Dense(32, activation='relu'),
    Dense(4, activation='sigmoid')
]
)

In [44]:
model = Sequential([encoder_model, additional_layers])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model_4 (Functional)        (None, 7, 7, 32)          5088      
                                                                 
 sequential_4 (Sequential)   (None, 4)                 873252    
                                                                 
Total params: 878340 (3.35 MB)
Trainable params: 873252 (3.33 MB)
Non-trainable params: 5088 (19.88 KB)
_________________________________________________________________


In [45]:
labels = json.load(open(labels_dataset, "r"))

#Now, from landmark_list, we want to extract a window of 0.5 seconds before and after each key press
#We will use the labels dictionary to label each window
#We will use the timestamps list to find the closest timestamp to the key press timestamp
#We will use the freq variable to calculate the number of frames we need to extract for each window
#We will use the loaded_landmark_list to extract the frames
window_size = 0.5  #0.5 seconds before and after the key press
freq = 14 #14 frames per second
window_frames = int(freq * window_size)
X, Y = [], []
for key in labels.keys():
    key = float(key)
    closest_timestamp = min(timestamps, key=lambda x:abs(x-key))
    index = timestamps.index(closest_timestamp)
    if index - window_frames < 0:
        continue
    if index + window_frames >= len(timestamps):
        continue
    
    window = []
    for i in range(index-window_frames, index+window_frames):
        k = str(timestamps[i])
        window.append(landmark_list[k])

    X.append(window)
    Y.append(labels[str(key)])

#retrive a structure with shape (n_windows, window_size, 21, 3)
data2 = []
for window in X:
    temp = []
    for sample in window:
        temp.append(np.array(list(sample.values())))
    
    data2.append(np.array(temp))

data = np.array(data2)
del data2


In [46]:
from sklearn.preprocessing import OneHotEncoder
#Y = [1 if y == 'Key.left' else 0 for y in Y]
Y = np.array(Y)
encoder = OneHotEncoder()
Y_encoded = encoder.fit_transform(Y.reshape(-1, 1)).toarray()

In [61]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, Y_encoded, test_size=0.2, random_state=0)

In [62]:
model.fit(X_train, y_train, epochs=30, batch_size=16, shuffle=True, validation_split=0.1)

Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x31e658910>

In [63]:
y_pred = model.predict(X_test)



In [64]:
y_test.shape, y_pred.shape

((46, 4), (46, 4))

In [65]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))

array([[ 3,  0,  0,  1],
       [ 0, 17,  0,  0],
       [ 0,  1, 21,  0],
       [ 0,  0,  0,  3]])

In [66]:
from sklearn.metrics import precision_score, recall_score, f1_score
precision = precision_score(y_test.argmax(axis = 1), y_pred.argmax(axis=1), average='weighted')
recall = recall_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), average='weighted')
f1 = f1_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), average='weighted')
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

Precision:  0.9631642512077295
Recall:  0.9565217391304348
F1:  0.9565795175501949


# Trying with baseline model

In [67]:
import keras
from keras.layers import Conv3D, MaxPooling2D, Flatten, Reshape, LSTM, Dense, Conv2D, Concatenate
num_classes = set(Y).__len__()
inp_shape = data.shape[1:]

# Input shape: (n_samples, 14, 21, 3)
input = Input(shape=inp_shape)
x = Conv2D(8, (2, 2), activation='relu', padding='same')(input)
x = MaxPooling2D((2, 3), padding='same')(x)
x = Conv2D(16, (2, 2), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 3), padding='same')(x)

y = Reshape((inp_shape[0]*inp_shape[1], inp_shape[2]))(input)
y = LSTM(16, return_sequences=True)(y)
y = Flatten()(y)
y = Dense(192, activation='relu')(y)
y = Reshape((4, 3, 16))(y)
y = Concatenate()([x, y])
y = Flatten()(y)
y = Dense(64, activation='relu')(y)
y = Dense(16, activation='relu')(y)
y = Dense(4, activation='softmax')(y)
model = Model(input, y)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, shuffle=True)

Epoch 1/30


2023-06-20 17:53:40.890338: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-06-20 17:53:41.070428: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


 1/11 [=>............................] - ETA: 15s - loss: 1.1679 - accuracy: 0.3750

2023-06-20 17:53:41.308431: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2023-06-20 17:53:42.650025: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-06-20 17:53:42.730081: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x31e637a50>

In [68]:
y_pred = model.predict(X_test)



2023-06-20 17:53:58.264034: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2023-06-20 17:53:58.326738: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [69]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))

array([[ 2,  0,  0,  2],
       [ 0, 17,  0,  0],
       [ 0,  2, 20,  0],
       [ 0,  0,  0,  3]])

In [70]:
precision = precision_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), average='weighted')
recall = recall_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), average='weighted')
f1 = f1_score(y_test.argmax(axis=1), y_pred.argmax(axis=1), average='weighted')
print("Precision: ", precision)
print("Recall: ", recall)
print("F1: ", f1)

Precision:  0.9350114416475972
Recall:  0.9130434782608695
F1:  0.9114044168391995
