#This is an example of performing Video Activity Recognition using LSTM
Modified from "Hands-on Computer Vision with TensorFlow 2" by B. Planche and E. Andres

In [1]:
# STEP 1:  Install packages in the current environment
import sys
!{sys.executable} -m pip install opencv-python
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install tqdm
!{sys.executable} -m pip install scikit-learn



In [2]:
#!{sys.executable} -m pip install tensorflow==1.14
!{sys.executable} -m pip install tensorflow-gpu==2.1.0 --user



# Step 2: import modules

In [3]:
import tensorflow as tf
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tqdm
from sklearn.preprocessing import LabelBinarizer


In [4]:
#tf.enable_eager_execution()
#tf.executing_eagerly()

#Step 3: setup variables

In [5]:
#location of where YOU have installed the data set UCF-101 located
# at 
#BASE_PATH = '../data/UCF-101'
#change the base path to location YOU installed UCF-101 dataset
#BASE_PATH = 'C:/Grewe/Classes/CS663/Mat/LSTM/data/UCF-101'
BASE_PATH = 'D:\\CSUEB\\lynne\\DataSet_NPY'
VIDEOS_PATH = os.path.join(BASE_PATH, '**','*.mp4')
TENSORBOARD_LOG_DIRECTORY = "D:\\CSUEB\\lynne\\tensorboard_logs"
#this specifies the sequence length will process by LSTM - only use 30 as some of the videos 1.X seconds
SEQUENCE_LENGTH = 40
BATCH_SIZE = 16
NUM_EPOCHS = 1000
RESULT_DIFF_THRESHOLD=0.1
print(VIDEOS_PATH)


D:\CSUEB\lynne\DataSet_NPY\**\*.mp4


### STEP 4: 
Create a MyLabelBinarizer for 2 Classes

In [6]:
#run this

class MyLabelBinarizer(LabelBinarizer):
    def transform(self, y):
        Y = super().transform(y)
        if self.y_type_ == 'binary':
            return np.hstack((Y, 1-Y))
        else:
            return Y
    def inverse_transform(self, Y, threshold=None):
        if self.y_type_ == 'binary':
            return super().inverse_transform(Y[:, 0], threshold)
        else:
            return super().inverse_transform(Y, threshold)

In [7]:
#run this
LABELS = ['Falling','Walking']
encoder = MyLabelBinarizer()
encoder.fit(LABELS)
print(encoder.classes_)
print(encoder.transform(['Falling', 'Walking']))

t= encoder.transform(['Falling', 'Walking'])
print(t)
print(encoder.inverse_transform(t))
print("length of labrels " + str(len(LABELS)))

['Falling' 'Walking']
[[0 1]
 [1 0]]
[[0 1]
 [1 0]]
['Falling' 'Walking']
length of labrels 2


### STEP 5 (a): 
    Create the LSTM model:    1) Masking layer  2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5  
 3) a fully connected relu activation layer with 256 outputs,  4) a droupout layer 0.5  5) a final decision fully connected layer of putput length of labels  (which is the number of classes) with softmax activation.
 
 >>potentially going to 256 rather than 512, Dense layer 128

In [8]:
#setup a keras Sequential model with 1) Masking layer  2) LSTM layer with 512 cells, dropout 0.5, recurrent_dropout of 0.5  
# 3) a fully connected relu activation layer with 256 outputs,  4) a droupout layer 5) a final decision fully connected layer of length of labels
# (which is the number of classes) with softmax activation.
model = tf.keras.Sequential([
    tf.keras.layers.Masking(mask_value=0.),
    tf.keras.layers.LSTM(512, dropout=0.5, recurrent_dropout=0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(2, activation='softmax')
    #tf.keras.layers.Dense(len(LABELS), activation='softmax')
])

### STEP 5 (b): 
Setup for the model the Loss function, the Optimizer function, and any metrics want to compute in training
>> read about metrics and to add 

In [9]:
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy', 'top_k_categorical_accuracy'])

### STEP 6: 
Setup  the training and test list which are lists of the training filenames.   Note you will need to change the location of these files to point to your location.  Define a function make_generator that returns a generator which will randomly shuffle a file list (either training or testing that will be passed later) and then changes the file extension of the avi files listed in the list to .npy which is our features for that avi video which were calcluated in step 6

In [10]:
#train_file = '/Users/subhi/Downloads/CV-proj3/Data/trainlist.txt'
#test_file = '/Users/subhi/Downloads/CV-proj3/Data/testlist.txt'

#contain about 10% of data roughly in validation set
valid_file = 'D:/CSUEB/lynne/DataSet_NPY/validation.txt'

#contain about 80% of data roughly in training set
train_file = 'D:/CSUEB/lynne/DataSet_NPY/training.txt'

#contain about 10% of data roughly in testing set
test_file = 'D:/CSUEB/lynne/DataSet_NPY/testing.txt'

with open(valid_file) as f:
    valid_list = [row.strip() for row in list(f)]

with open(train_file) as f:
     train_list = [row.strip() for row in list(f)]
        
with open(test_file) as f:
     test_list = [row.strip() for row in list(f)]
    #train_list = [row.split(' ')[0] for row in train_list]
#print(train_list)


def make_generator(file_list):
    def generator():
        np.random.shuffle(file_list)
        for path in file_list:
            label = os.path.basename(os.path.dirname(path))
            features = np.load(path)
            
            padded_sequence = np.zeros((SEQUENCE_LENGTH, 1280))
            padded_sequence[0:len(features)] = np.array(features)
            
            transformed_label = encoder.transform([label])
            
            yield padded_sequence, transformed_label[0]
    return generator

In [11]:
os.path.dirname("D:\CSUEB\lynne\DataSet_NPY\Falling\Img 2927-9.npy")

'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling'

In [12]:
print(train_list)

['D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4113-52.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2947-21.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4117-56.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4222-128.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4188-103.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4183-100.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4187-102.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4098-39.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4204-115.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2941-17.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4135-68.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4173-94.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4118-57.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4137-70.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2963-32.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4136-69.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2948-

In [13]:
print(valid_list)

['D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2927-9.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4125-62.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4166-89.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4184-101.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4120-59.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4121-60.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4180-98.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4225-131.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4191-106.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4193-107.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4226-132.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2952-24.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2949-23.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (537).npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (569).npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (926).npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walk

In [14]:
print(test_list)

['D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2915-1.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2933-12.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2934-13.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4100-41.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2923-5.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2940-16.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4105-45.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4167-90.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4107-47.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4198-112.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4161-85.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 4149-80.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Falling\\Img 2925-7.npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (72).npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (841).npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (160).npy', 'D:\\CSUEB\\lynne\\DataSet_NPY\\Walking\\walking (74

### STEP 10: 
Setup the train_dataset and valid_dataset (validation/testing).   Here we setting up training batch sets of 16.  

In [15]:
#for tesnorflow 2.*
train_dataset = tf.data.Dataset.from_generator(make_generator(train_list),
                output_types=(tf.float32, tf.int16),
                output_shapes=((SEQUENCE_LENGTH, 1280), (len(LABELS))))
                 

train_dataset = train_dataset.batch(16,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)


valid_dataset = tf.data.Dataset.from_generator(make_generator(valid_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, 1280), (len(LABELS))))
valid_dataset = valid_dataset.batch(16,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)

# Test dataset
test_dataset = tf.data.Dataset.from_generator(make_generator(test_list),
                 output_types=(tf.float32, tf.int16),
                 output_shapes=((SEQUENCE_LENGTH, 1280), (len(LABELS))))
test_dataset = test_dataset.batch(16,drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE)


In [16]:
print(train_dataset)

<PrefetchDataset shapes: ((16, 40, 1280), (16, 2)), types: (tf.float32, tf.int16)>


In [17]:
print(valid_dataset)

<PrefetchDataset shapes: ((16, 40, 1280), (16, 2)), types: (tf.float32, tf.int16)>


In [18]:
print(test_dataset)

<PrefetchDataset shapes: ((16, 40, 1280), (16, 2)), types: (tf.float32, tf.int16)>


In [19]:
BASE_DATA_PATH = 'D:/CSUEB/lynne'
mylog_dir = os.path.join(BASE_DATA_PATH, "train_log")
print("Mylog directory = " + mylog_dir)

Mylog directory = D:/CSUEB/lynne\train_log


In [20]:
# create the directory for the Tensorboard logging files
if not os.path.exists(TENSORBOARD_LOG_DIRECTORY):
    os.mkdir(TENSORBOARD_LOG_DIRECTORY)
    
# Create dir for checkpoint call back
# Ref: https://lambdalabs.com/blog/tensorflow-2-0-tutorial-03-saving-checkpoints/
MODEL_CHECKPOINT_PATH = os.path.join(BASE_DATA_PATH, "model_checkpoint")
if not os.path.exists(MODEL_CHECKPOINT_PATH):
    os.mkdir(MODEL_CHECKPOINT_PATH)    
MODEL_CHECKPOINT_FILE_NAME = os.path.join(MODEL_CHECKPOINT_PATH, "weights.{epoch:04d}-{val_loss:.2f}.hdf5")

In [21]:
#tf 2.0
    
#defining tensorboard callback
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = TENSORBOARD_LOG_DIRECTORY, update_freq='epoch')
#defining checkpoint callback
modelcheckpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    MODEL_CHECKPOINT_FILE_NAME, monitor='val_accuracy', verbose=0, save_best_only=False,
    save_weights_only=False, mode='auto', save_freq='epoch')


#model.fit(train_dataset, epochs=1, callbacks=[tensorboard_callback], validation_data=valid_dataset)
model.fit(train_dataset, epochs=NUM_EPOCHS, callbacks=[tensorboard_callback, modelcheckpoint_callback], validation_data=valid_dataset)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000


Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
 2/61 [..............................] - ETA: 15s - loss: 0.0000e+00 - accuracy: 1.0000 - top_k_categorical_accuracy: 1.0000

KeyError: 'val_loss'

In [22]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking (Masking)            multiple                  0         
_________________________________________________________________
lstm (LSTM)                  multiple                  3672064   
_________________________________________________________________
dense (Dense)                multiple                  131328    
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  514       
Total params: 3,803,906
Trainable params: 3,803,906
Non-trainable params: 0
_________________________________________________________________


### STEP 12:  save the tensorflow model to an h5 file

### STEP 14: run evaluation on the test data feature extraction

In [24]:
# evaluate the test data using model


# Evaluate the model on the test data using `evaluate`
print('\n# Evaluate on test data')

# NOTE: should have separate test data but, only have validation data
#results = model.evaluate_generator(val_data_gen, verbose=1)
# make and save predictions for all of the testing files in testing_list
for i in range(len(testing_list)):
    features = loadData(testing_list[i], SEQUENCE_LENGTH, FEATURE_LENGTH)
    prediction = predict(features, model)
    classification = makeDecision(prediction, LABELS)
    prediction_file.write("Prediction: " + classification + "\n" 
                          "Prediction Data Source: " + testing_list[i] + "\n\n\n")
    #write out the prediction vector

prediction_file.close()




# Evaluate on test data
      7/Unknown - 0s 37ms/step - loss: 9.1402e-04 - accuracy: 1.0000 - top_k_categorical_accuracy: 1.0000test loss, test acc: [0.0009140229091763834, 1.0, 1.0]


### STEP 15: Run predictions on the test data feature extracted

In [34]:
# make predictions

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print('\n# Generate predictions ')
predictions = model.evaluate(test_dataset, verbose=1)


# Generate predictions 
      7/Unknown - 3s 445ms/step - loss: 1.5352e-04 - accuracy: 1.0000 - top_k_categorical_accuracy: 1.0000

In [33]:
print(predictions)

[4.410081542762155e-05, 1.0, 1.0]
