In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Mounted at /content/drive


In [3]:
'''
Author       : Aditya Jain
Date Started : This notebook was created on 2nd December, 2020
About        : Implementing CNN+RNN+CTC
'''
!pip install editdistance
!pip install comet-ml
# import comet_ml at the top of your file
from comet_ml import Experiment

# Create an experiment with your api key:
experiment = Experiment(
    api_key="epeaAhyRcHSkn92H4kusmbX8k",
    project_name="ctc-lfd",
    workspace="adityajain07",
    log_code="True"
)
experiment.set_code()
experiment.add_tag('ser and ler logging')

Collecting comet-ml
[?25l  Downloading https://files.pythonhosted.org/packages/24/81/141e187956ec0af7051d3aa6b571aed7301e3531352b90448fabe81820e6/comet_ml-3.2.8-py2.py3-none-any.whl (238kB)
[K     |████████████████████████████████| 245kB 15.5MB/s 
[?25hCollecting everett[ini]>=1.0.1; python_version >= "3.0"
  Downloading https://files.pythonhosted.org/packages/fe/dc/38593280ec30fe1cb2611ec65554b76b68d13582bf490113e3332cdd85ea/everett-1.0.3-py2.py3-none-any.whl
Collecting wurlitzer>=1.0.2
  Downloading https://files.pythonhosted.org/packages/0c/1e/52f4effa64a447c4ec0fb71222799e2ac32c55b4b6c1725fccdf6123146e/wurlitzer-2.0.1-py2.py3-none-any.whl
Collecting netifaces>=0.10.7
  Downloading https://files.pythonhosted.org/packages/0c/9b/c4c7eb09189548d45939a3d3a6b3d53979c67d124459b27a094c365c347f/netifaces-0.10.9-cp36-cp36m-manylinux1_x86_64.whl
Collecting websocket-client>=0.55.0
[?25l  Downloading https://files.pythonhosted.org/packages/4c/5f/f61b420143ed1c8dc69f9eaec5ff1ac36109d52c80de

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/adityajain07/ctc-lfd/0f58dec0cb6d4fb9b13b3f7d957b8023



In [4]:
%load_ext autoreload
%autoreload 2

from tensorflow import keras
from tensorflow.keras.layers import Dropout, Dense, Input, Reshape, TimeDistributed, Lambda, LSTM, Bidirectional, Conv2D, MaxPooling2D, Flatten
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model 
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences

import numpy as np
import pickle
from sklearn.model_selection import train_test_split

import datetime
from keras.callbacks import ModelCheckpoint
import editdistance


# import inference
import cv2

HOST_DIR = "/content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/"

In [5]:
import sys
sys.path.append(HOST_DIR)
import inference

#### Importing MIME Data

In [6]:
WRITE_DIR   = HOST_DIR + "saved_model/"
DTSTR       = datetime.datetime.now()
DTSTR       = DTSTR.strftime("%Y-%m-%d-%H-%M")

# data_read  = pickle.load(open("/home/aditya/Dropbox/LearningfromDemons/ctc_data/MIME_full.pickle","rb"))
data_read  = pickle.load(open(HOST_DIR + "MIME_full.pickle", "rb"))

image_data = data_read['data_image']
labels     = data_read['data_label']
prim_map   = data_read['primitive_map']
label_map  = data_read['label_map']

labels  = pad_sequences(labels, padding='post', value = 0)  # making sure all labels are of equal length

print(image_data.shape)
print(labels.shape)
print(prim_map)
print(label_map)

x_train, x_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=43)  
# note: passing a value to random_state produces the exact split every time

print("Training Data: ", x_train.shape, y_train.shape)
print("Testing Data: ", x_test.shape, y_test.shape)

no_classes    = len(prim_map)+1      # one extra label bcz of padding
max_label_len = labels.shape[-1]

training_pts  = int(x_train.shape[0])
test_pts      = int(x_test.shape[0])

print("Total classes of primitives: ", no_classes)
print("Max label length: ", max_label_len)

(1773, 30, 800)
(1773, 7)
{1: 'Reach', 2: 'Tilt', 3: 'Retract', 4: 'Grasp', 5: 'Release'}
{'Push': [1, 1, 3], 'Pour': [1, 4, 1, 2, 1, 5, 3], 'Pick': [1, 4, 1, 1, 5, 3], 'Stack': [1, 4, 1, 5, 3]}
Training Data:  (1418, 30, 800) (1418, 7)
Testing Data:  (355, 30, 800) (355, 7)
Total classes of primitives:  6
Max label length:  7


In [7]:
print("Total training points: ", training_pts)
print("Total test points: ", test_pts)

Total training points:  1418
Total test points:  355


#### Model Architecture

In [8]:
#### Doing Here

image_shape = x_train.shape[1:]        # the image shape
no_channels = 1                        # no of channels in the image, 3 in case of RGB
print(image_shape)

# no_classes        = 80
# max_label_len = 4
print(type(image_shape[0]))

# architecture is defined below

inputs     = Input(shape=image_shape)
reshape1   = Reshape((image_shape[0], image_shape[1], 1))(inputs)
conv_1     = Conv2D(32, (3,3), activation = 'relu', padding='same')(reshape1)
max_pool1  = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool1)
max_pool2  = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool2)
max_pool3  = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4     = Conv2D(64, (3,3), activation = 'relu', padding='same')(max_pool3)
max_pool4  = MaxPooling2D(pool_size=(2, 2))(conv_4)
squeezed   = Lambda(lambda x: K.squeeze(x, 1))(max_pool4)
# reshape    = Reshape(target_shape=(int(image_shape[0]/8), int(image_shape[1]/8*64)))(max_pool3)
# dense1     = Dense(64)(reshape)                                                  # this dense helps reduce no of params
blstm1     = Bidirectional(LSTM(64, return_sequences=True))(squeezed)
blstm2     = Bidirectional(LSTM(128, return_sequences=True))(blstm1)
outputs    = Dense(no_classes+1, activation="softmax")(blstm2)


model_arch = Model(inputs, outputs)           # for viz the model architecture
model_arch.summary()

(30, 800)
<class 'int'>
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 30, 800)]         0         
_________________________________________________________________
reshape (Reshape)            (None, 30, 800, 1)        0         
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 800, 32)       320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 400, 32)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 15, 400, 64)       18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 200, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7

#### Loss Function

In [9]:
labels       = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
 
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])
model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam')

In [10]:
train_input_length = np.asarray([squeezed.shape[1] for i in range(training_pts)])              # the number of timesteps that go as input to LSTM layer
train_label_length = np.asarray([max_label_len for i in range(training_pts)])

test_input_length = np.asarray([squeezed.shape[1] for i in range(test_pts)])
test_label_length = np.asarray([max_label_len for i in range(test_pts)])

#### Defining Callbacks for the training process

In [11]:
class MetricCallback(keras.callbacks.Callback):
    '''
    This callback calculates various metrics on training and validation data at the end of every epoch
    
    Arguments:
    
    '''
    def __init__(self, pred_model, x_train, y_train, x_test, y_test, experiment):
        super(MetricCallback, self).__init__()
        self.train_acc = 0
        self.val_acc  = 0
        self.x_train   = x_train
        self.y_train   = y_train
        self.x_test    = x_test
        self.y_test    = y_test
        self.weights   = None
        self.pred_model= pred_model

        # final metrics of interest
        self.train_ser = 0     # sequence error rate for training data
        self.val_ser   = 0     # sequence error rate for validation data
        self.train_ler = 0     # label error rate on training data
        self.val_ler   = 0     # label error rate on validation data

    
    def on_epoch_end(self, epoch, logs=None):
        print("End of epoch number: ", epoch)
        
        self.model.save_weights('callback_model.hdf5')
        self.pred_model.load_weights('callback_model.hdf5')
        
        self.train_accuracy()
        self.val_accuracy()

        experiment.log_metric("train_ser", self.train_ser)
        experiment.log_metric("val_ser", self.val_ser)
        experiment.log_metric("train_ler", self.train_ler)
        experiment.log_metric("val_ler", self.val_ler)

        
    def train_accuracy(self):
        '''calculates accuracy on train data'''
        train_pred = self.pred_model.predict(x_train)
        decode_pred = K.get_value(K.ctc_decode(train_pred, input_length=np.ones(train_pred.shape[0])*train_pred.shape[1],
                         greedy=True)[0][0])
        
        train_points = self.x_train.shape[0]
        count       = 0

        total_edits   = 0
        total_lab_len = 0
        
        # removing all extra label or -1's induced by CTC
        for i in range(train_points):   
            pred_label = []  # the final label
            
            x = decode_pred[i]
            for item in x:
                if item!=-1:
                    pred_label.append(item)
                
            pred_label = np.asarray(pred_label)            
            if np.array_equal(pred_label,y_train[i]):
                count += 1
            
            total_edits   += editdistance.eval(pred_label, y_train[i])
            total_lab_len += len(y_train[i])
        
        self.train_acc = count/train_points*100
        self.train_ser = 100 - self.train_acc
        self.train_ler = total_edits/total_lab_len*100
        
    
    def val_accuracy(self):
        '''calculates accuracy on validation data'''
        test_pred = self.pred_model.predict(x_test)
        decode_pred = K.get_value(K.ctc_decode(test_pred, input_length=np.ones(test_pred.shape[0])*test_pred.shape[1],
                         greedy=True)[0][0])
        
        test_points = self.x_test.shape[0]
        count       = 0

        total_edits   = 0
        total_lab_len = 0
        
        # removing all extra label or -1's induced by CTC
        for i in range(test_points):   
            pred_label = []  # the final label
            
            x = decode_pred[i]
            for item in x:
                if item!=-1:
                    pred_label.append(item)
                
            pred_label = np.asarray(pred_label)            
            if np.array_equal(pred_label,y_test[i]):
                count += 1

            total_edits   += editdistance.eval(pred_label, y_test[i])
            total_lab_len += len(y_test[i])
        
        self.val_acc = count/test_points*100
        self.val_ser = 100 - self.val_acc
        self.val_ler = total_edits/total_lab_len*100
        
model_save_callback = ModelCheckpoint(WRITE_DIR + "best_model-" + DTSTR + ".hdf5", monitor='val_loss', verbose=1,
    save_best_only=True, mode='auto')

#### Training

In [12]:
EPOCHS      = 200

model.fit(x=[x_train, y_train, train_input_length, train_label_length], y=np.zeros(training_pts), epochs=EPOCHS,
         validation_data = ([x_test, y_test, test_input_length, test_label_length], [np.zeros(test_pts)]),
         callbacks=[MetricCallback(model_arch, x_train, y_train, x_test, y_test, experiment), model_save_callback],
         batch_size=32, verbose=0)

COMET INFO: Ignoring automatic log_parameter('verbose') because 'keras:verbose' is in COMET_LOGGING_PARAMETERS_IGNORE


End of epoch number:  0

Epoch 00001: val_loss improved from inf to 8.06141, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2020-12-18-12-17.hdf5
End of epoch number:  1

Epoch 00002: val_loss improved from 8.06141 to 6.32584, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2020-12-18-12-17.hdf5
End of epoch number:  2

Epoch 00003: val_loss improved from 6.32584 to 6.14034, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2020-12-18-12-17.hdf5
End of epoch number:  3

Epoch 00004: val_loss improved from 6.14034 to 5.25318, saving model to /content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC_Code/saved_model/best_model-2020-12-18-12-17.hdf5
End of epoch number:  4

Epoch 00005: val_loss improved from 5.25318 to 3.75830, saving model to /content/drive/My Drive/TCS

<tensorflow.python.keras.callbacks.History at 0x7f9b8edfd978>

In [13]:
experiment.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/adityajain07/ctc-lfd/0f58dec0cb6d4fb9b13b3f7d957b8023
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     batch_loss [1000]         : (0.0002930849732365459, 67.2491455078125)
COMET INFO:     epoch_duration [200]      : (2.604252921999887, 16.325590454999883)
COMET INFO:     loss [200]                : (0.0003099938912782818, 14.390475273132324)
COMET INFO:     train_ler [200]           : (0.0, 85.71428571428571)
COMET INFO:     train_ser [200]           : (0.0, 100.0)
COMET INFO:     val_ler [200]             : (0.32193158953722334, 85.71428571428571)
COMET INFO:     val_loss [200]            : (0.10630326718091965, 8.061408996582031)
COMET INFO:     val_ser [200]             : (1.1267605633802873, 100.0)
COMET INFO:     validate_batch_loss

#### Save and Load Weights

In [None]:
model.save_weights('first_run.hdf5')
model_arch.load_weights('first_run.hdf5')

#### Inference on a single test data

In [None]:
video_path     = "/content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/CTC-MIME/stacking1.avi"

# These params cannot be changed
desired_shape  = (30, 800)
n_frames       = 10
down_f         = 8

infer_object   = inference.Inference(video_path, n_frames, down_f, desired_shape, model_arch)
image = infer_object.prep_data()
predicted_out, final_out = infer_object.predict()

print("Raw output: ", predicted_out)
print("Final processed output: ", final_out)
print('\n')

for primtive in final_out:
    if primtive==0:
        continue
    else:
        print(prim_map[primtive])

Raw output:  [ 1  4  1  1  5  3  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1]
Final processed output:  [1 4 1 1 5 3 0]


Reach
Grasp
Reach
Reach
Release
Retract


### Miscellaneous (Do Not Run)

In [None]:
## Saving the model

model.save_weights('first_run.hdf5')

NameError: name 'model' is not defined

In [None]:
# model.save_weights('first_run.hdf5')
# model_arch.load_weights('first_run.hdf5')
 
# predict outputs on validation images
test_points = 2

# Inference data
infer_data    = x_train[:test_points]
infer_label   = y_train[:test_points]

prediction  = model_arch.predict(infer_data)

# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
 

print(out)

[[ 1  4  1  1  5  3  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1]
 [ 1  4  1  5  3  0  0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
  -1 -1]]


In [None]:
for i in range(test_points):
    print("True label: ", infer_label[i])
    
    pred_lab = []
    x = out[i]
    for i in x:
        if i!=-1:
            pred_lab.append(i)
            
    print("Predicted label: ", np.asarray(pred_lab))
    print(type(infer_label[i]))
    print('\n')
    

True label:  [1 4 1 1 5 3 0]
Predicted label:  [1 4 1 1 5 3 0]
<class 'numpy.ndarray'>


True label:  [1 4 1 5 3 0 0]
Predicted label:  [1 4 1 5 3 0 0]
<class 'numpy.ndarray'>


