In [None]:
import os
import cv2
import random
import numpy as np
import datetime as dt
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

from moviepy.editor import *

In [None]:
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [None]:
dataset = 'Downloads/Dataset/'
dataset

# Testing For Getting images or not

In [None]:
# Create a plot with specific imaze size
plt.figure(figsize=(20,20))
# Getting class names from Dataset
all_class_names_list = os.listdir(dataset)
#Generate the random images from 3 classes
random_range = random.sample(range(len(all_class_names_list)),3)

# Iterating through all the values in random_range
for counter, random_index in enumerate(random_range, 1):
    
    #retrive a class name based on the index
    selected_class_names = all_class_names_list[random_index]
    
    # retrive the list of all video files present in randomly seleted class_directory
    video_files_names_list = os.listdir(f'Downloads/Dataset/{selected_class_names}')
    
    # Select the video files randomly 
    selected_video_file_names = random.choice(video_files_names_list)
    
    # Caputure the object from videos
    video_reader = cv2.VideoCapture(f'Downloads/Dataset/{selected_class_names}/{selected_video_file_names}')
    
    # Read the frames from video files
    _, bgr_frame = video_reader.read()
    
    # Release the video capture object
    video_reader.release()
    
    # Convert the frames into RGB format
    rgb_frame = cv2.cvtColor(bgr_frame, cv2.COLOR_BGR2RGB)
    
    # Put text on images based on the label name
    cv2.putText(rgb_frame, selected_class_names,(10,30), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2)
    
    # Display frames
    plt.subplot(5,4, counter);
    plt.imshow(rgb_frame);
    plt.axis('off')

# Prepare Dataset

In [None]:
# Dataset Directory
DATASET_DIR = "./Downloads/Dataset/"

# Image size of each frame will resize
Image_height,Image_width = 224, 224

# Specify the number of frames fed to the  model
SEQUENCE_LENGTH = 30



# Specify the Classes list
CLASSES_LIST = ["Falling","Loitering","Voilence"]

# Function to Extract the frames from video

In [None]:
def frames_extraction(video_path):
    
    # Declare the list to store the video frames
    frames_list = []
    
    # Videocapture from the video files
    video_reader = cv2.VideoCapture(video_path)
    # Total number of Frames Count
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # skipping some of frames based on the action intervals
    skip_frame_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)
    
    for frame_counter in range(SEQUENCE_LENGTH):
        # Current Frame Position of the video
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter*skip_frame_window)
        #Read the frames
        success,frames = video_reader.read()
        # checking the frames successfully read or not in case not skip
        if not success:
            break
            
        # Resize the frames with fixed height and width
        resized_frames = cv2.resize(frames, (Image_height,Image_width))
        
        # Normalize the image
        normalized_frames = resized_frames/255
        
        # Append the normalized images to the frameslist
        frames_list.append(normalized_frames)
    
    # video object capture release
    video_reader.release()
    
    return frames_list

# Create A Dataset

In [None]:
def create_dataset():
    features = []
    labels  = []
    video_files_paths =  []
    
    # Iterating through all the classes mentioned the classes list
    for class_index, class_name in enumerate(CLASSES_LIST):
        print(f'Extracting data of class: {class_name}')
        
        # Get the list of video files present in the specific class name
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        print(files_list)
        
        # Iterate through all the files present
        for file_name in files_list:
            
            # Get complete video path
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)
            
            # Extract the frames of the video file path
            frames = frames_extraction(video_file_path)
            
            # Check if the extracted frames are equal or not
            if len(frames) == SEQUENCE_LENGTH:
                
                # Append the features to frames and labels
                features.append(frames)
                labels.append(class_index)
                video_files_paths.append(video_file_path)
    
    # converting the features and labels are into array form 
    features = np.array(features)
    labels   = np.array(labels)
        
    return features, labels, video_files_paths
            

In [None]:
features, labels, video_files_paths = create_dataset()

In [None]:
# Converting the lables based on the class each folder in the form of 1 and 0
one_hot_encoded_labels = to_categorical(labels)
one_hot_encoded_labels

In [None]:
# Splitting features and labels into X_train,X_test,y_train and y_test 
X_train,X_test,y_train,y_test = train_test_split(features,one_hot_encoded_labels,test_size=.20,shuffle=True,random_state=seed_constant)

In [None]:
X_train.shape

# Model LRCN Function

In [None]:
# Base Model with less layers 
def LRCN_Model():
    
    # we use sequential model for model constructions
    model = Sequential()
    
    # Model Architecture
    model.add(TimeDistributed(Conv2D(16,(3,3),padding ='same',activation='relu'), input_shape= (SEQUENCE_LENGTH,
                                                                               Image_height,Image_width,3)))
    # adding maxpooling layers to the model to extract specific data from feature maps
    model.add(TimeDistributed(MaxPooling2D(4,4)))
    # adding dropout layers to reduce the overfitting problem
    model.add(TimeDistributed(Dropout(0.20)))
    
    model.add(TimeDistributed(Conv2D(32,(3,3),padding ='same',activation='relu')))
    model.add(TimeDistributed(MaxPooling2D(4,4)))
    model.add(TimeDistributed(Dropout(0.20)))
    

    
    model.add(TimeDistributed(Conv2D(64,(3,3),padding ='same',activation='relu')))
    model.add(TimeDistributed(MaxPooling2D(2,2)))
    model.add(TimeDistributed(Dropout(0.20)))
    
    model.add(TimeDistributed(Conv2D(64, (3,3), padding ='same', activation='relu')))
    model.add(TimeDistributed(MaxPooling2D(2,2)))
    
    # Flattened layer takes all outputs from all convoluation layers to single 1*1 layer 
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(32))
    
    # fully connected layer we are the output based on number of classes
    model.add(Dense(len(CLASSES_LIST), activation='softmax'))
    
    model.summary()
    return model

    
    

In [None]:
lrcn_model = LRCN_Model()


# Model Train

In [None]:
# Callbacks EarlyStopping
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, mode='min', restore_best_weights=True)

# Compile the model and specify loss function

lrcn_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

# train the model using fit

lrcn_model_training = lrcn_model.fit(X_train,y_train,epochs=50, batch_size=4,
                                    shuffle=True, validation_split=0.20,
                                    callbacks=[early_stopping_callback])

# Evaluate the Trained Model

In [None]:
# Evaluate the trained model

model_evaluate_history = lrcn_model.evaluate(X_test, y_test)
model_evaluate_history

# Save Model

In [None]:
# loss and accuracy for model
model_evaluation_loss, model_evaluation_accuracy = model_evaluate_history

date_time_format = "%Y_%m_%d_%H_%M_%S"
current_date_time_dt = dt.datetime.now()
current_date_time_string = dt.datetime.strftime(current_date_time_dt,date_time_format)

#model file name
model_file_name = f'./Models_Save/lrcn_model_Date_Time_{current_date_time_string}_Loss_{model_evaluation_loss}_Accuracy_{model_evaluation_accuracy}_normal1__'

#save model 
lrcn_model.save(model_file_name)

# Prediction

In [None]:
# Loading the save model
from keras.models import load_model
restored_model = load_model('tensorflow_S/Save_Model/')

# Getting Output from entire video as Single output

In [None]:

# lrcn_model
def prediction_video(input_video_path, SEQUENCE_LENGTH):
    
    frame_list = []
    # Capturing the video from the input
    video_reader = cv2.VideoCapture(input_video_path)
    
    # Getting Shape of image like Height and Width
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # Video frame count
    video_frame_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frame_window = max(int(video_frame_count/SEQUENCE_LENGTH), 1)
    
    for frame_counter in range(SEQUENCE_LENGTH):
        
        # Current Frame Position of the video
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter*skip_frame_window)
        #Read the frames
        success,frames = video_reader.read()
        # checking the frames successfully read or not in case not skip
        if not success:
            break
            
        # Resize the frames with fixed height and width
        resized_frames = cv2.resize(frames, (Image_height,Image_width))
        
        # Normalize the image
        normalized_frames = resized_frames/255
        
        # Append the normalized images to the frameslist
        frame_list.append(normalized_frames)
    
    # Getting the probabilites of each classes for image as output
    predict_label_prob = restored_model.predict(np.expand_dims(frame_list, axis=0))[0]
    # Based on the probablities we used highest probalities as predicted value and their position too
    predict_label = np.argmax(predict_label_prob)
    # mapped that position index to given classes_list we get the output as lable name
    predicted_class_name = CLASSES_LIST[predict_label]
    
    #Dispaly the results
    print(f'Action Prediction:{predicted_class_name}\nConfidence :{predict_label_prob[predict_label]}')
    
    video_reader.release()
        

In [None]:
input_video_path = "Downloads/testdata/videoplayback (online-video-cutter.com) (1).mp4"
# input_video_path = "Downloads/pexels-mary-taylor-6002473.mp4"
prediction_video(input_video_path,SEQUENCE_LENGTH)
VideoFileClip(input_video_path,audio=False,target_resolution=(300,None)).ipython_display(maxduration=120)

# Getting Multiple Outputs from entire Video Based on the Frame by Frame

In [None]:
from collections import deque


def predict_on_video(video_file_path,SEQUENCE_LENGTH):
    
    # Capturing the video from the input
    video_reader = cv2.VideoCapture(video_file_path)
    original_video_width = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    original_video_height = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # Getting the frames and make as a queue with sequence length
    frames_queue = deque(maxlen = SEQUENCE_LENGTH)
    predicted_class_name = ''
    list_prediction = []
    # we are checking the condition for video
    while video_reader.isOpened():
        # Read the video frames 
        ok,frame = video_reader.read()
        # checking that the frames are read or not. In case not we will break that
        if not ok:
            break
        
        # we are resizing the extracted frames into 224*224
        resized_frame = cv2.resize(frame, (Image_height,Image_width))
        # we are normalizing the frame in between 0 to 1
        normalized_frame = resized_frame / 255
        # Appeneding all normalize frames to the one list
        frames_queue.append(normalized_frame)
        # here we are feeding frames based on the SEQUENCE_LENGTH
        if len(frames_queue) == SEQUENCE_LENGTH :
            # We are using our trained model for extract the probablities for each class
            predicted_labels_probabilities = restored_model.predict(np.expand_dims(frames_queue, axis = 0))[0]
            # we are getting the lable index based on the probablites which one is high
            predict_label = np.argmax(predicted_labels_probabilities)
            # Based on the label index we will getting the class name 
            predicted_class_name = CLASSES_LIST[predict_label]
#            append all lables values for each frame to alist
            list_prediction.append(predicted_class_name)
 
    video_reader.release()
    return list_prediction

In [None]:
input_video_path = 'Downloads/fi034_UjEJAleb.mp4'
result = predict_on_video(input_video_path, SEQUENCE_LENGTH)
result

# Convert the Deep Learning Model into TFLite Model

In [None]:
import tensorflow as tf
saved_model_dir = "tensorflow_S/Save_Model/"
output_model_dir = 'tensorflow_S/model_to22.tflite'
def normalmodel_to_tflitemodel(saved_model_dir,output_model_dir):
    # here we are loading our saved deep learning model
    converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir, signature_keys=['serving_default'])
    # In optimization we reduce the size and make same accuracy
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
    # converting into the tflite model
    tflite_model = converter.convert()
    # saving in specific path 
    with open(output_model_dir, 'wb') as f:
        f.write(tflite_model)

    print("Done")

In [None]:
normalmodel_to_tflitemodel(saved_model_dir,output_model_dir)

# Prediction with TFLite model for Video

In [None]:
import tensorflow.lite as tflite
import time
from collections import deque
# Load TFLite model and allocate tensors.
interpreter = tflite.Interpreter(model_path='tensorflow_S/model_to1.tflite')
#allocate the tensors
interpreter.allocate_tensors()

# Checking the input and output details from model
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Specify the number of frames fed to the  model
SEQUENCE_LENGTH = 30

# Image size of each frame will resize
Image_height,Image_width = 224, 224

# Specify the Classes list
CLASSES_LIST = ["Falling","Loitering","Voilence"]


def tflite_model_prediction(input_video_path, SEQUENCE_LENGTH):
    
    #     start = time.time()
    # Capturing the video from the input
    video_reader = cv2.VideoCapture(input_video_path)
    # Getting the frames and make as a queue with sequence length
    frames_queue = deque(maxlen= SEQUENCE_LENGTH)
    list_prediction = []
    # we are checking the condition for video
    while video_reader.isOpened():
        # read the frames from video
        ok,frame = video_reader.read()

        if not ok:
            break
        
        # resizing the frames        
        resized_frame = cv2.resize(frame, (Image_height,Image_width))
        # Normalizing the frames in the range of 0 to 1
        normalized_frame = resized_frame / 255
        # appending the normalize frames into frames_queue
        frames_queue.append(normalized_frame)
        # converting to frames_queue from float64 to float32 to support the model
        frames_que = np.float32(frames_queue)

 
        if len(frames_que) == SEQUENCE_LENGTH :
            # Passing the all input data with dimensions like X for model
            input_tensor= np.array(np.expand_dims(frames_que,0))
            # getting the input indexes like y for model
            input_index = interpreter.get_input_details()[0]['index']
            # Both input_tensor and input_index passing through the model
            interpreter.set_tensor(input_index, input_tensor)
            interpreter.invoke()
            # getting the output details 
            output_details = interpreter.get_output_details()
            
            output_data = interpreter.get_tensor(output_details[0]['index'])
            # Getting prediction probabilities of the output
            pred = np.squeeze(output_data)
            # Getting highest probabilities of the value as our output and index
            predict_label = np.argmax(pred)
            # Getting the class label based on the index
            predicted_class_name = CLASSES_LIST[predict_label]

#             end = time.time()
#             print("$$$$$$$$$$$$$$$$$$$$$$$$$$$",end-start)
            
            list_prediction.append(predicted_class_name)


    video_reader.release()
    return predicted_class_name

In [None]:
input_video_path = 'Downloads/fi034_UjEJAleb.mp4'
result = tflite_model_prediction(input_video_path, SEQUENCE_LENGTH)
result