In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
'''
Author       : Aditya Jain
Date Started : This notebook was created on 2nd December, 2020
About        : Implementing CNN+RNN+CTC
'''
!pip install editdistance
!pip install comet-ml
# import comet_ml at the top of your file
from comet_ml import Experiment

# Create an experiment with your api key:
experiment = Experiment(
    api_key="epeaAhyRcHSkn92H4kusmbX8k",
    project_name="ctc-lfd",
    workspace="adityajain07",
    log_code="True"
)
experiment.set_code()
experiment.add_tag('4_Train and Test on Imitation Videos_confusion')



COMET INFO: Experiment is live on comet.ml https://www.comet.ml/adityajain07/ctc-lfd/3e262eb6aa1c4f1c9a3bb8568a7d174a



In [24]:
%load_ext autoreload
%autoreload 2

from tensorflow import keras
from tensorflow.keras.layers import Dropout, Dense, Input, Reshape, TimeDistributed, Lambda, LSTM, Bidirectional, Conv2D, MaxPooling2D, Flatten
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model 
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences

import numpy as np
import pickle
from sklearn.model_selection import train_test_split

import datetime
from keras.callbacks import ModelCheckpoint
import editdistance


# import inference
import cv2

HOST_DIR = "/content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
import sys
sys.path.append(HOST_DIR)
import inference

#### Importing MIME Data

In [26]:
WRITE_DIR   = HOST_DIR + "saved_model/"
DTSTR       = datetime.datetime.now()
DTSTR       = DTSTR.strftime("%Y-%m-%d-%H-%M")

# data_read  = pickle.load(open("/home/aditya/Dropbox/LearningfromDemons/ctc_data/MIME_full.pickle","rb"))
data_read  = pickle.load(open(HOST_DIR + "MIME_full_v2.pickle", "rb"))

image_data = data_read['data_image']
labels     = data_read['data_label']
prim_map   = data_read['primitive_map']
label_map  = data_read['label_map']

labels  = pad_sequences(labels, padding='post', value = 0)  # making sure all labels are of equal length

print(image_data.shape)
print(labels.shape)
print(prim_map)
print(label_map)

# x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=43)  
# note: passing a value to random_state produces the exact split every time
x_train   = image_data
y_train   = labels


(4003, 30, 800)
(4003, 7)
{1: 'Reach', 2: 'Tilt', 3: 'Retract', 4: 'Grasp', 5: 'Release', 6: 'Displace', 7: 'Stir', 8: 'Wipe', 9: 'Press', 10: 'Rotate'}
{'Push': [1, 1, 3], 'Pour': [1, 4, 1, 2, 1, 5, 3], 'Pick': [1, 4, 1, 1, 5, 3], 'Stack': [1, 4, 1, 5, 3], 'Place': [1, 4, 1, 5, 3], 'Mix': [1, 4, 1, 7, 1, 5, 3], 'Clean': [1, 4, 8, 5, 3], 'Rotate': [1, 4, 10, 5, 3], 'Poke': [1, 9, 3]}


In [None]:
# Case 1: Train on MIME, Test on Self Data
test_data = pickle.load(open(HOST_DIR + "MIME_self_v2.pickle", "rb"))

x_test    = test_data['data_image']
y_test    = pad_sequences(test_data['data_label'], padding='post', value = 0)  # making sure all labels are of equal length

In [None]:
# [RUN ONLY IF NEEDED] Case 2: Mixing the two datasets 

final_data  = np.concatenate([x_train, x_test])
final_label = np.concatenate([y_train, y_test])

x_train, x_test, y_train, y_test = train_test_split(final_data, final_label, test_size=0.2, random_state=43)

In [None]:
# [RUN ONLY IF NEEDED] Case 3: Testing on Imitation Test Videos
test_data = pickle.load(open(HOST_DIR + "Imitation_3rdP_Test.pickle", "rb"))

x_test    = test_data['data_image']
y_test    = pad_sequences(test_data['data_label'], padding='post', value = 0)  # making sure all labels are of equal length

In [27]:
# [RUN ONLY IF NEEDED] Case 4: Training and Testing on Imitation Test Videos
test_data = pickle.load(open(HOST_DIR + "Imitation_3rdP_Test.pickle", "rb"))

data    = test_data['data_image']
label    = pad_sequences(test_data['data_label'], padding='post', value = 0)  # making sure all labels are of equal length

x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=43)

In [28]:
print("Training Data: ", x_train.shape, y_train.shape)
print("Testing Data: ", x_test.shape, y_test.shape)

no_classes    = len(prim_map)+1      # one extra label bcz of padding
max_label_len = labels.shape[-1]

training_pts  = int(x_train.shape[0])
test_pts      = int(x_test.shape[0])

print("Total classes of primitives: ", no_classes)
print("Max label length: ", max_label_len)

print("Total training points: ", training_pts)
print("Total test points: ", test_pts)

Training Data:  (96, 30, 800) (96, 7)
Testing Data:  (24, 30, 800) (24, 7)
Total classes of primitives:  11
Max label length:  7
Total training points:  96
Total test points:  24


#### Model Architecture

In [29]:
#### Doing Here

image_shape = x_train.shape[1:]        # the image shape
no_channels = 1                        # no of channels in the image, 3 in case of RGB
print(image_shape)
print(type(image_shape[0]))

# architecture is defined below

inputs     = Input(shape=image_shape)
reshape1   = Reshape((image_shape[0], image_shape[1], 1))(inputs)
conv_1     = Conv2D(32, (3,3), activation = 'relu', padding='same')(reshape1)
max_pool1  = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool1)
max_pool2  = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool2)
max_pool3  = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool3)
max_pool4  = MaxPooling2D(pool_size=(2, 2))(conv_4)
squeezed   = Lambda(lambda x: K.squeeze(x, 1))(max_pool4)
# reshape    = Reshape(target_shape=(int(image_shape[0]/8), int(image_shape[1]/8*64)))(max_pool3)
# dense1     = Dense(64)(reshape)                                                  # this dense helps reduce no of params
blstm1     = Bidirectional(LSTM(64, return_sequences=True))(squeezed)
blstm2     = Bidirectional(LSTM(128, return_sequences=True))(blstm1)
outputs    = Dense(no_classes+1, activation="softmax")(blstm2)


model_arch = Model(inputs, outputs)           # for viz the model architecture
model_arch.summary()

(30, 800)
<class 'int'>
Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 30, 800)]         0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 30, 800, 1)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 30, 800, 32)       320       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 15, 400, 32)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 15, 400, 64)       18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 7, 200, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None,

#### Loss Function

In [30]:
labels       = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')

In [31]:
train_input_length = np.asarray([squeezed.shape[1] for i in range(training_pts)])              # the number of timesteps that go as input to LSTM layer
train_label_length = np.asarray([max_label_len for i in range(training_pts)])

test_input_length = np.asarray([squeezed.shape[1] for i in range(test_pts)])
test_label_length = np.asarray([max_label_len for i in range(test_pts)])

#### Defining Callbacks for the training process

In [32]:
class MetricCallback(keras.callbacks.Callback):
    '''
    This callback calculates various metrics on training and validation data at the end of every epoch
    
    Arguments:
    
    '''
    def __init__(self, pred_model, x_train, y_train, x_test, y_test, experiment):
        super(MetricCallback, self).__init__()
        self.train_acc = 0
        self.val_acc  = 0
        self.x_train   = x_train
        self.y_train   = y_train
        self.x_test    = x_test
        self.y_test    = y_test
        self.weights   = None
        self.pred_model= pred_model

        # final metrics of interest
        self.train_ser = 0     # sequence error rate for training data
        self.val_ser   = 0     # sequence error rate for validation data
        self.train_ler = 0     # label error rate on training data
        self.val_ler   = 0     # label error rate on validation data

    
    def on_epoch_end(self, epoch, logs=None):
        print("End of epoch number: ", epoch)
        
        self.model.save_weights('callback_model.hdf5')
        self.pred_model.load_weights('callback_model.hdf5')
        
        self.train_accuracy()
        self.val_accuracy()

        experiment.log_metric("train_ser", self.train_ser)
        experiment.log_metric("val_ser", self.val_ser)
        experiment.log_metric("train_ler", self.train_ler)
        experiment.log_metric("val_ler", self.val_ler)

        
    def train_accuracy(self):
        '''calculates accuracy on train data'''
        train_pred = self.pred_model.predict(x_train)
        decode_pred = K.get_value(K.ctc_decode(train_pred, input_length=np.ones(train_pred.shape[0])*train_pred.shape[1],
                         greedy=True)[0][0])
        
        train_points = self.x_train.shape[0]
        count       = 0

        total_edits   = 0
        total_lab_len = 0
        
        # removing all extra label or -1's induced by CTC
        for i in range(train_points):   
            pred_label = []  # the final label
            
            x = decode_pred[i]
            for item in x:
                if item!=-1:
                    pred_label.append(item)
                
            pred_label = np.asarray(pred_label)            
            if np.array_equal(pred_label,y_train[i]):
                count += 1
            
            total_edits   += editdistance.eval(pred_label, y_train[i])
            total_lab_len += len(y_train[i])
        
        self.train_acc = count/train_points*100
        self.train_ser = 100 - self.train_acc
        self.train_ler = total_edits/total_lab_len*100
        
    
    def val_accuracy(self):
        '''calculates accuracy on validation data'''
        test_pred = self.pred_model.predict(x_test)
        decode_pred = K.get_value(K.ctc_decode(test_pred, input_length=np.ones(test_pred.shape[0])*test_pred.shape[1],
                         greedy=True)[0][0])
        
        test_points = self.x_test.shape[0]
        count       = 0

        total_edits   = 0
        total_lab_len = 0
        
        # removing all extra label or -1's induced by CTC
        for i in range(test_points):   
            pred_label = []  # the final label
            
            x = decode_pred[i]
            for item in x:
                if item!=-1:
                    pred_label.append(item)
                
            pred_label = np.asarray(pred_label)            
            if np.array_equal(pred_label,y_test[i]):
                count += 1

            total_edits   += editdistance.eval(pred_label, y_test[i])
            total_lab_len += len(y_test[i])
        
        self.val_acc = count/test_points*100
        self.val_ser = 100 - self.val_acc
        self.val_ler = total_edits/total_lab_len*100
        
model_save_callback = ModelCheckpoint(WRITE_DIR + "best_model-" + DTSTR + ".hdf5", monitor='val_loss', verbose=1,
    save_best_only=True, mode='auto')

earlystop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)  # stop training if val_loss does not improve in 10 consec episodes



#### Training

In [33]:
EPOCHS      = 200

model.fit(x=[x_train, y_train, train_input_length, train_label_length], y=np.zeros(training_pts), epochs=EPOCHS,
         validation_data = ([x_test, y_test, test_input_length, test_label_length], [np.zeros(test_pts)]),
         callbacks=[MetricCallback(model_arch, x_train, y_train, x_test, y_test, experiment), 
         model_save_callback, earlystop_callback],
         batch_size=32, verbose=0)

End of epoch number:  0

Epoch 00001: val_loss improved from inf to 57.74776, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2021-02-09-11-56.hdf5
End of epoch number:  1

Epoch 00002: val_loss improved from 57.74776 to 21.11547, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2021-02-09-11-56.hdf5
End of epoch number:  2

Epoch 00003: val_loss improved from 21.11547 to 20.57434, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2021-02-09-11-56.hdf5
End of epoch number:  3

Epoch 00004: val_loss improved from 20.57434 to 15.08357, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2021-02-09-11-56.hdf5
End of epoch number:  4

Epoch 00005: val_loss did not improve from 15.08357
End of epoch number:  5

Epoch 00006: va

<tensorflow.python.keras.callbacks.History at 0x7fd7459b96a0>

#### Save and Load Weights

In [34]:
model.save_weights('first_run.hdf5')
model_arch.load_weights('first_run.hdf5')

#### Accuracy on a (preferably on a held-out) batch of dataset
In this section, simply pass a self-collected or any other dataset along with labels. Returns accuracy or the inverse of SER

In [35]:
def convert_to_onehot(label):
  '''
  converts labels to its one-hot encoding
  '''
  dictionary = {}
  i = 0
  label_int_list = []

  for item in label:
    if str(item) in dictionary.keys():
      label_int_list.append(dictionary[str(item)])
    else:
      dictionary[str(item)] = i
      label_int_list.append(i)
      i += 1

  return to_categorical(label_int_list), dictionary

In [39]:
def accuracy_on_batch(model, data, labels, experiment):  
  pred        = model.predict(data)
  decode_pred = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])*pred.shape[1],
                         greedy=True)[0][0])        
  points      = data.shape[0]
  count       = 0
  pred_label_list = []

  for i in range(points):          # removing all extra -1's induced by CTC
    pred_label = []                # the final label
            
    x = decode_pred[i]
    for item in x:
      if item!=-1:
        pred_label.append(item)
                
    pred_label = np.asarray(pred_label)  
    pred_label_list.append(pred_label)          
    
    if np.array_equal(pred_label,labels[i]):
      # print(labels[i])
      count += 1            
        
  accuracy = count/points*100
  actual_one_hot, dict1 = convert_to_onehot(labels)
  pred_one_hot, dict2 = convert_to_onehot(pred_label_list)
  experiment.log_confusion_matrix(actual_one_hot, pred_one_hot)

  return accuracy, dict1

In [40]:
weights_path = WRITE_DIR + 'best_model-2021-02-08-08-04.hdf5'
weights_path = 'first_run.hdf5'
model_arch.load_weights(weights_path)

batch_accuracy, dict_labels = accuracy_on_batch(model_arch, x_test, y_test, experiment)

print("The accuracy on the batch in % is: ", batch_accuracy)
print(dict_labels)


The accuracy on the batch in % is:  0.0
{'[1 4 8 5 3 0 0]': 0, '[1 4 1 5 3 0 0]': 1, '[1 4 1 2 1 5 3]': 2}


In [38]:
experiment.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/adityajain07/ctc-lfd/3e262eb6aa1c4f1c9a3bb8568a7d174a
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     batch_loss [144]          : (1.2460647821426392, 95.04901885986328)
COMET INFO:     epoch_duration [144]      : (0.3190991699998449, 7.81170693099989)
COMET INFO:     loss [144]                : (1.5106345415115356, 88.60364532470703)
COMET INFO:     train_ler [144]           : (13.095238095238097, 100.0)
COMET INFO:     train_ser [144]           : (65.625, 100.0)
COMET INFO:     val_ler [144]             : (16.071428571428573, 100.0)
COMET INFO:     val_loss [144]            : (1.6288180351257324, 57.74775695800781)
COMET INFO:     val_ser [144]             : (70.83333333333333, 100.0)
COMET INFO:     validate_batch_loss [144] : (1.6288

#### Inference on a single test data

In [None]:
video_path     = "/content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC-MIME/22dec/stacking4.avi"

# These params cannot be changed
desired_shape  = (30, 800)
n_frames       = 10
down_f         = 8

infer_object   = inference.Inference(video_path, n_frames, down_f, desired_shape, model_arch)
image = infer_object.prep_data()
predicted_out, final_out = infer_object.predict()

print("Raw output: ", predicted_out)
print("Final processed output: ", final_out)
print('\n')

for primtive in final_out:
    if primtive==0:
        continue
    else:
        print(prim_map[primtive])

Raw output:  [ 1  4  1  5  3  0  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1]
Final processed output:  [1 4 1 5 3 0 0]


Reach
Grasp
Reach
Release
Retract


### Miscellaneous (Do Not Run)

In [None]:
## Saving the model

model.save_weights('first_run.hdf5')

NameError: name 'model' is not defined

In [None]:
# model.save_weights('first_run.hdf5')
# model_arch.load_weights('first_run.hdf5')
 
# predict outputs on validation images
test_points = 2

# Inference data
infer_data    = x_train[:test_points]
infer_label   = y_train[:test_points]

prediction  = model_arch.predict(infer_data)

# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
 

print(out)

[[ 1  4  1  1  5  3  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1]
 [ 1  4  1  5  3  0  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1]]


In [None]:
for i in range(test_points):
    print("True label: ", infer_label[i])
    
    pred_lab = []
    x = out[i]
    for i in x:
        if i!=-1:
            pred_lab.append(i)
            
    print("Predicted label: ", np.asarray(pred_lab))
    print(type(infer_label[i]))
    print('\n')
    

True label:  [1 4 1 1 5 3 0]
Predicted label:  [1 4 1 1 5 3 0]
<class 'numpy.ndarray'>


True label:  [1 4 1 5 3 0 0]
Predicted label:  [1 4 1 5 3 0 0]
<class 'numpy.ndarray'>


