# Primitive Segmentation using ConvLSTM

**Author:** Aditya Jain <br>
**Date started:** 27th July, 2020<br>
**Description:** Predict primitves actions in a human demonstration using ConvLSTM model.

### Setup


In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import math
import pylab as plt
import os
import glob
import cv2
from google.colab.patches import cv2_imshow
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
import datetime

In [3]:
N_CLASSES  = 2          # no. of primitves in the TADL
PX         = 128        # no. of rows in training/test images
PY         = 128        # no. of columns in training/test images
CHANNELS   = 3          # no. of channels in the image
N_FRAMES   = 20         # no. of frames in each training/test video
BATCH_SIZE = 32         # size of the batches
DATA_DIR   = "/content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/TADL-II/"
TEST_SPLIT = 0.2        # no. of test samples to draw from data

# directory for saving the trained model
WRITE_DIR  =  "/content/drive/My Drive/TCS FullTime Work/LfD/Liquid_Pouring/Training/"     

## Building the Dataset

Reads the datafiles and builds the dataset

In [4]:
def preprocessing(img, width, height):
  '''
  returns a preprocessed image suitable for training
  '''
  img = cv2.resize(img, (width, height))       # image resize
  # cv2_imshow(img)                            # optional command to visualize the read image
  img = img.astype("float32") / 255            # rescale the image from 0-1

  return img



def build_dataset(dataset_dir, n_frames):
  '''   
  takes input the parent data directory and no of frames to sample from every video;
  returns the data and label in correct format and dimensions
  '''
  # no. of primitives in the library
  prim_actions    = [dI for dI in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir,dI))]
  prim_no         = 0                               # label for every primitive

  # stores the final data to be returned
  image_data  = []
  label_data  = []

  for action in prim_actions:
    # prim_dict[action] = i
    prim_path       = os.path.join(DATA_DIR, action)   # gives path for each primitive  
    prim_no         += 1

    for video_path in glob.iglob(prim_path + '/*.mp4'):  # iterate over every primitive video
      # print(video_path)

      # for storing data for the current video
      temp_image   = []
      temp_label   = []
      
      sec         = 0      
      vidcap      = cv2.VideoCapture(video_path)     
      fps         = vidcap.get(cv2.CAP_PROP_FPS)           #  FPS of the video      
      frame_count = vidcap.get(cv2.CAP_PROP_FRAME_COUNT)   #  total frame count
      total_sec   = frame_count/fps                        #  total video length in seconds
      # print("Total Time: ", total_sec, frame_count)

      TIME_SEC    = total_sec/n_frames                     # the video will be sampled after every TIME_SEC          
      i = 0
      while (sec < total_sec):
        vidcap.set(cv2.CAP_PROP_POS_MSEC,sec*1000)         # setting which frame to get
    
        sec          += TIME_SEC
        success,image = vidcap.read()
    
        if success:
          process_image = preprocessing(image, PX, PY)
          temp_image.append(process_image)
          i += 1

      # print("Total frames in video taken: ", i) 

      image_data.append(temp_image)
      temp_label = prim_no       
      label_data.append(temp_label)   

  return  np.asarray(image_data),  np.asarray(tf.one_hot(label_data, prim_no))
      

data, label = build_dataset(DATA_DIR, N_FRAMES)

train_data, test_data, train_label, test_label = train_test_split(
    data, label, test_size=0.20, shuffle=True, random_state=0)

In [5]:
print(data.shape, label.shape)

(11, 20, 128, 128, 3) (11, 4)


## Build a model

We create a model which take as input movies of shape
`(n_frames, width, height, channels)` and returns a movie
of identical shape.


In [6]:
# Working model with 1 output format

model = keras.Sequential(
    [
        keras.Input(shape = (N_FRAMES, PX, PY, CHANNELS)),  
        layers.ConvLSTM2D(
            filters=64, kernel_size=(3, 3), padding="same", return_sequences=False
        ),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        # layers.Dropout(0.5),     # Enable if the model is overfitting
        # layers.Dense(100, activation='relu'),
        layers.Dense(N_CLASSES, activation='softmax'),
        
    ]
)

# print model summary
model.summary()

# model compilation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d (ConvLSTM2D)    (None, 128, 128, 64)      154624    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 262144)            0         
_________________________________________________________________
dense (Dense)                (None, 2)                 524290    
Total params: 678,914
Trainable params: 678,914
Non-trainable params: 0
_________________________________________________________________


## Train the model (and Test)


In [7]:
EPOCHS     = 100
BATCH_SIZE = 2
DTSTR      = datetime.datetime.now()
DTSTR      = DTSTR.strftime("%Y-%m-%d-%H-%M")

checkpoint = ModelCheckpoint(WRITE_DIR + "best_model-" + DTSTR + ".h5", monitor='loss', verbose=1,
    save_best_only=True, mode='auto', period=1)

model.fit(
    train_data,
    train_label,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=2,
    validation_data = (test_data, test_label),
    callbacks=[checkpoint]
)

# The final trained model
model.save(WRITE_DIR + "final_model-" + DTSTR + ".h5")

Epoch 1/100


ValueError: ignored

In [64]:
print(train_data.shape)

(8, 20, 128, 128, 3)


## Miscellaneous - Not to be run

In [6]:
model = keras.Sequential(
    [
        keras.Input(shape = (None, PX, PY, CHANNELS)),  
        layers.ConvLSTM2D(
            filters=64, kernel_size=(3, 3), padding="same", return_sequences=True
        ),
        layers.ConvLSTM2D(
            filters=40, kernel_size=(3, 3), padding="same", return_sequences=False
        ),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        # layers.Dense(100, activation='relu'),
        layers.Dense(N_CLASSES, activation='softmax'),
        
    ]
)

# print model summary
model.summary()

# model compilation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv_lst_m2d (ConvLSTM2D)    (None, None, 128, 128, 64 154624    
_________________________________________________________________
conv_lst_m2d_1 (ConvLSTM2D)  (None, 128, 128, 40)      149920    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 40)        0         
_________________________________________________________________
flatten (Flatten)            (None, 163840)            0         
_________________________________________________________________
dropout (Dropout)            (None, 163840)            0         
_________________________________________________________________
dense (Dense)                (None, 2)                 327682    
Total params: 632,226
Trainable params: 632,226
Non-trainable params: 0
__________________________________________________

### Model Evaluation on Test Set

In [None]:
score = model.evaluate(test_data, test_label, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])


Test loss: 99.90675354003906
Test accuracy: 0.0


In [None]:
model = keras.Sequential()
model.add(layers.ConvLSTM2D(filters = 64, kernel_size = (3, 3), return_sequences = False, data_format = "channels_last", input_shape = (N_FRAMES, PX, PY, 3)))
model.add(layers.Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation="relu"))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(2, activation = "softmax"))

model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])

In [None]:
model = keras.Sequential(
    [
        keras.Input(shape = (None, PX, PY, CHANNELS)),  
        layers.ConvLSTM2D(
            filters=64, kernel_size=(3, 3), padding="same", return_sequences=False
        ),
        layers.BatchNormalization(),
        layers.ConvLSTM2D(
            filters=64, kernel_size=(3, 3), padding="same", return_sequences=True
        ),
        layers.BatchNormalization(),
        layers.ConvLSTM2D(
            filters=64, kernel_size=(3, 3), padding="same", return_sequences=True
        ),
        layers.BatchNormalization(),
        layers.Flatten(),
        layers.Dropout(0.5),
        
        # layers.Dense(100, activation='relu'),
        layers.Dense(N_CLASSES, activation='softmax'),
        
    ]
)

model.summary()

# model compilation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])

In [None]:
test_data = np.asarray(train_data)
test_label = np.asarray(train_label)

print(train_data.shape)
print(train_label.shape)
print(train_label)

(2, 15, 128, 128, 3)
(2, 2)
[[1. 0.]
 [0. 1.]]
