# This is our Training code LSTM and RNN based methods. Please note we cannot uploade the huge dataset in submission so reading the files is not possible here. We can show demo in our system if needed. Also, this training takes a lot of time on PCs, and needs GPUs for training

In [1]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [1]:
import numpy as np
import os
import pickle

if os.path.exists("strokes_data2/.DS_Store"):
  os.remove(".DS_Store")
files=os.listdir("strokes_data2")

count=0
x=[]
x_load=[]
y=[]
y_load=[]
x_vali=[]
y_vali=[]
x_vali_load=[]
y_vali_load=[]
for file in files:
    if file.find('.npz')==-1:
        print('issue in files, will corrupt the encodings')
        break
    file = "strokes_data2/" + file
    x = np.load(file,encoding='latin1', allow_pickle=True)
    x_temp=x['train'][0:20000]
    x_vali=x['valid'][0:20000]
    x_load.append(x_temp)
    x_vali_load.append(x_vali)
    y=[count for _ in range(x_temp.shape[0])]
    y_vali=[count for _ in range(x_vali.shape[0])]
    count += 1
    y= np.array(y).astype('float32')
    y= y.reshape(y.shape[0], 1)
    y_load.append(y)
    y_vali= np.array(y_vali).astype('float32')
    y_vali= y_vali.reshape(y_vali.shape[0], 1)
    y_vali_load.append(y_vali)


features= np.array(x_load)
labels=np.array(y_load)

features_vali= np.array(x_vali_load)
labels_vali=np.array(y_vali_load)

In [2]:
max_strokes=0
for x in range(features.shape[0]):
    for y in range(features[x].shape[0]):
        max_strokes=max(max_strokes,len(features[x][y]))
        features[x][y][:,0]=features[x][y][:,0].cumsum()
        features[x][y][:,1]=features[x][y][:,1].cumsum()

In [None]:
# BAsic LSTM

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from keras.utils import Sequence, to_categorical
from keras.preprocessing.sequence import pad_sequences


class StrokeDataset(Sequence):# making datagenerater to optimize GPU ram storage
    def __init__(self, features, labels, batch_size, max_strokes, num_classes, shuffle=True):
        self.features= features
        self.labels=labels
        self.batch_size=batch_size
        self.max_strokes= max_strokes
        self.num_classes= num_classes
        self.shuffle=shuffle
        self.indexes=np.arange(len(self.labels))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.labels)/self.batch_size))

    def __getitem__(self, index):
        batch_indexes=self.indexes[index*self.batch_size : (index+1)*self.batch_size]
        features_batch=[self.features[k] for k in batch_indexes]
        labels_batch=[self.labels[k] for k in batch_indexes]

        X= pad_sequences(features_batch, maxlen=self.max_strokes, padding='post', dtype='float32')
        X=np.array(X)
        y=to_categorical(labels_batch, num_classes=self.num_classes)

        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)


n_files=len(files)
n_samples_per_file=features[0].shape[0] # train data samples
n_total_samples=n_files * n_samples_per_file


all_feature =[]
all_labels=[]
for file_idx, strokes in enumerate(features):
    for stroke in strokes:
        all_features.append(stroke) # need to get x and y coord iwth pen up and down vector here for train sequenc
        all_labels.append(file_idx)

# print("Total samples:", len(all_features))

from sklearn.model_selection import train_test_split
train_features,val_features,train_labels,val_labels=train_test_split(
    all_features,all_labels,test_size=0.3,random_state=42, shuffle=True)

batch_size=64 # keeping small so that ram can take up that miuch data for training
num_classes=n_files
num_features=3  #(x,y,pen up bin)
max_strokes=max_strokes  

# making gen data loader to optmize traing for not loading entire data into ram, but making a generator to getdata as and when needed into ram
train_generator=StrokeDataset(train_features,train_labels, batch_size, max_strokes, num_classes)
val_generator=StrokeDataset(val_features,val_labels, batch_size, max_strokes, num_classes, shuffle=False)
# using GPu 
with tf.device('/GPU:0'):
    def create_lstm_classifier(max_strokes, num_features, num_classes): # model builder
        inputs=layers.Input(shape=(max_strokes, num_features))
        x=layers.Masking(mask_value=0.0)(inputs) # masking needed to skip strokes that are not in pen down state
        x=layers.LSTM(128, return_sequences=True,use_cudnn=False)(x) # adding LSTM layers
        x=layers.LSTM(64,use_cudnn=False)(x)
        x=layers.Dense(128, activation='relu')(x) # final mlp for pred
        x=layers.Dropout(0.3)(x) # regularization factor
        outputs =layers.Dense(num_classes, activation='softmax')(x) # pred layer
        model=models.Model(inputs, outputs)
        return model
    lstm_model=create_lstm_classifier(max_strokes=max_strokes, num_features=num_features, num_classes=num_classes)
    lstm_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), # using decently small lr
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    lstm_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=7)
    lstm_model.save('lstm_model_final.keras')


Epoch 1/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m594s[0m 54ms/step - accuracy: 0.4312 - loss: 2.0928 - val_accuracy: 0.7010 - val_loss: 1.0389
Epoch 2/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m619s[0m 54ms/step - accuracy: 0.6935 - loss: 1.0847 - val_accuracy: 0.7591 - val_loss: 0.8312
Epoch 3/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m578s[0m 53ms/step - accuracy: 0.7425 - loss: 0.9081 - val_accuracy: 0.7838 - val_loss: 0.7473
Epoch 4/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m571s[0m 52ms/step - accuracy: 0.7698 - loss: 0.8117 - val_accuracy: 0.7998 - val_loss: 0.6886
Epoch 5/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m574s[0m 52ms/step - accuracy: 0.7856 - loss: 0.7551 - val_accuracy: 0.8114 - val_loss: 0.6552
Epoch 6/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m570s[0m 52ms/step - accuracy: 0.7967 - loss: 0.7150 - val_accuracy: 0.8145 - val_loss:

In [17]:
for x in range(features_vali.shape[0]):
    for y in range(features_vali[x].shape[0]):
        features_vali[x][y][:,0]=features_vali[x][y][:,0].cumsum()
        features_vali[x][y][:,1]=features_vali[x][y][:,1].cumsum()


In [16]:
import shutil
from google.colab import files as files_mod
files_mod.download('lstm_model_final.keras') # downloading this from colab (where i had gpu) so that i have it in my local system for demo if needed

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras.utils import to_categorical
max_strokes=max_strokes  
num_classes=len(files)
all_features_test=[]
all_labels_test=[]
for file_idx, strokes in enumerate(features_vali):
    for stroke in strokes:
        all_features_test.append(stroke)  
        all_labels_test.append(file_idx)
test_generator = StrokeDataset(all_features_test, all_labels_test, batch_size=64, # using samedata gen again for same rsnesn 
                               max_strokes=max_strokes,num_classes=num_classes, shuffle=False)

test_loss, test_accuracy=lstm_model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}") # printing vals
print(f"Test Accuracy: {test_accuracy*100:.2f}%")


[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 15ms/step - accuracy: 0.8193 - loss: 0.6226
Test Loss: 0.6170
Test Accuracy: 82.16%


In [None]:
lstm_model.save('lstm_model_final_savedmodel', save_format='tf')

In [12]:
# ConvLSTM: old version, we are keeping it still for reference as we learned form its output
#PLEASE NOTE TRAIN_GENERATOR FUNCTION IS CHANGED NOW FOR CONV, SO DO NOT USE OLD MODEL IF YOU RUN BELOW AS VARIABLE NAMES ARE SAME

In [27]:
import numpy as np
import tensorflow as tf
from keras.utils import Sequence, to_categorical
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models
from sklearn.utils import shuffle

class StrokeDataset(Sequence): # need this for same reason as lstm code i usd as ram is limited in training and we need to load data as when needed and thisclass is how we do it.
    def __init__(self, features, labels, batch_size, max_strokes, num_classes, shuffle=True):
        self.features= features
        self.labels=labels
        self.batch_size=batch_size
        self.max_strokes=max_strokes # found this val in start fo data processing step
        self.num_classes=num_classes
        self.shuffle=shuffle
        self.indexes=np.arange(len(self.labels))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.labels) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes=self.indexes[index*self.batch_size : (index+1)*self.batch_size]
        features_batch=[self.features[k] for k in batch_indexes]
        labels_batch=[self.labels[k] for k in batch_indexes]

        X=pad_sequences(features_batch, maxlen=self.max_strokes, padding='post', dtype='float32')
        X=np.array(X)
        X=np.expand_dims(X, axis=2)

        y=to_categorical(labels_batch,num_classes=self.num_classes)

        return X, y
    def on_epoch_end(self):
        if self.shuffle:
            self.indexes = np.random.permutation(self.indexes)


n_files=len(files)
n_samples_per_file=features[0].shape[0]
n_total_samples=n_files*n_samples_per_file

all_features= []
all_labels =[]
for file_idx, strokes in enumerate(features):
    for stroke in strokes:
        all_features.append(stroke)
        all_labels.append(file_idx)

from sklearn.model_selection import train_test_split
train_features, val_features, train_labels, val_labels = train_test_split(
    all_features, all_labels, test_size=0.3, random_state=42, shuffle=True
)
# kept training paramssame
batch_size=64
num_classes=n_files
num_features=3
max_strokes =max_strokes  
# data gens
train_generator=StrokeDataset(train_features, train_labels, batch_size, max_strokes, num_classes)
val_generator=StrokeDataset(val_features, val_labels, batch_size, max_strokes, num_classes, shuffle=False)

with tf.device('/GPU:0'):
    def create_convlstm_classifier(max_strokes, num_features, num_classes):
        inputs = layers.Input(shape=(max_strokes, 1, num_features))  
        x = layers.Masking(mask_value=0.0)(inputs) # same as lstm
        # conv lyr
        x = layers.ConvLSTM1D(
            filters=64,
            kernel_size=3,
            padding='same',
            return_sequences=True,
            activation='tanh',
            recurrent_activation='sigmoid'
        )(x)
        x = layers.BatchNormalization()(x)
        x=layers.Dropout(0.3)(x)
        # adding convlstm
        x=layers.ConvLSTM1D(
            filters=32,
            kernel_size=3,
            padding='same',
            return_sequences=False,
            activation='tanh',
            recurrent_activation='sigmoid'
        )(x)
        x=layers.BatchNormalization()(x)
        x=layers.Dropout(0.3)(x)

        x=layers.Flatten()(x)
        x=layers.Dense(128, activation='relu')(x)
        x=layers.Dropout(0.3)(x)

        outputs=layers.Dense(num_classes,activation='softmax')(x)
        model=models.Model(inputs, outputs)
        return model

    convlstm_model = create_convlstm_classifier(max_strokes=max_strokes, num_features=num_features, num_classes=num_classes)
    convlstm_model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    convlstm_model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=7
    )

    convlstm_model.save('convlstm_model_final.keras')

Epoch 1/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2013s[0m 183ms/step - accuracy: 0.2777 - loss: 2.6902 - val_accuracy: 0.5661 - val_loss: 1.4941
Epoch 2/7
[1m10938/10938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2004s[0m 178ms/step - accuracy: 0.4955 - loss: 1.7861 - val_accuracy: 0.6177 - val_loss: 1.3128
Epoch 3/7
[1m 2098/10938[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m23:04[0m 157ms/step - accuracy: 0.5229 - loss: 1.6964

KeyboardInterrupt: 

In [28]:
import pandas as pd
pd.Series(files).to_csv('files_class_name_order.csv',index=False)
from google.colab import files as files_mod
files_mod.download('files_class_name_order.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [30]:
convlstm_model.save('convlstm_model_final.keras')

In [None]:
# already done in lstm testing
# for x in range(features_vali.shape[0]):
#     for y in range(features_vali[x].shape[0]):
#         features_vali[x][y][:,0]=features_vali[x][y][:,0].cumsum()
#         features_vali[x][y][:,1]=features_vali[x][y][:,1].cumsum()


In [31]:
import shutil
from google.colab import files as files_mod
files_mod.download('convlstm_model_final.keras') # downloading this from colab (where i had gpu) so that i have it in my local system for demo if needed

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [32]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from tensorflow.keras.utils import to_categorical

max_strokes=max_strokes
num_classes=len(files)

all_features_test=[]
all_labels_test=[]
for file_idx, strokes in enumerate(features_vali):
    for stroke in strokes:
        all_features_test.append(stroke)
        all_labels_test.append(file_idx)
# test gen is same for trainloader class, as ram can exceed kiling our session. Had bad experinexe :(
test_generator=StrokeDataset(all_features_test, all_labels_test, batch_size=64,
                               max_strokes=max_strokes, num_classes=num_classes, shuffle=False)

test_loss, test_accuracy = convlstm_model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}") # printing vals
print(f"Test Accuracy: {test_accuracy*100:.2f}%")


[1m   1/1954[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:29[0m 107ms/step - accuracy: 0.8438 - loss: 0.9301

  self._warn_if_super_not_called()


[1m1954/1954[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 54ms/step - accuracy: 0.6240 - loss: 1.2773
Test Loss: 1.2832
Test Accuracy: 62.76%


In [35]:
labels[0]

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)