In [1]:
import os
import cv2
import random
import math
import numpy as np
import datetime as dt
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os

dataset_path = os.listdir('/Users/vedantpadole/Desktop/Research/ASL_MY_DATASET')

all_classes = os.listdir('/Users/vedantpadole/Desktop/Research/ASL_MY_DATASET')
print (all_classes)  
len(all_classes)

['sugar', 'cup', 'sweet', 'ac', 'bill', 'thank-you', 'water', 'no', 'vegetables', 'pop corn', 'bitter', 'milk', 'allergy', 'spicy', 'cost', 'ice cream', 'chair', 'cheese', 'napkin', 'about', 'small', 'french fries', 'manager', 'salt', 'cold', 'warm', 'ingredients', 'burger', 'alcohol', 'eggs', 'chicken', 'what', 'bread', 'hello', 'sauce', 'bag', 'pizza', 'pepper', 'drink', 'which', 'gluten free']


41

In [3]:
IMAGE_HEIGHT,IMAGE_WIDTH=128,128
SEQUENCE_LENGTH=10
DATASET='/Users/vedantpadole/Desktop/Research/ASL_MY_DATASET'
CLASSES_LIST=all_classes

In [4]:
def frames_extraction(video_path):
  frames_list=[]
  video_reader=cv2.VideoCapture(video_path)
  video_frames_count=int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
  skip_frames_window=max(int(video_frames_count/SEQUENCE_LENGTH),1)
  for frame_counter in range(SEQUENCE_LENGTH):
    video_reader.set(cv2.CAP_PROP_POS_FRAMES,frame_counter*skip_frames_window)
    success,frame=video_reader.read()
    if not success:
      break
    resized_frame=cv2.resize(frame,(IMAGE_HEIGHT,IMAGE_WIDTH))
    normalized_frame=resized_frame/255
    frames_list.append(normalized_frame)
  video_reader.release()
  return frames_list

In [5]:
def create_dataset():
  features=[]
  labels=[]
  video_files_path=[]
  for class_index,class_name in enumerate(CLASSES_LIST):
    print(f'Extracting the data of class:  {class_name}')
    files_list=os.listdir(os.path.join(DATASET,class_name))
    for file_name in files_list:
      video_file_path=os.path.join(DATASET,class_name,file_name)
      frames=frames_extraction(video_file_path)
      if len(frames)==SEQUENCE_LENGTH:
        features.append(frames)
        labels.append(class_index)
        video_files_path.append(video_file_path)
  features=np.asarray(features)
  labels=np.array(labels)
  return features,labels,video_files_path

In [6]:
features,labels,video_files_path=create_dataset()

Extracting the data of class:  sugar
Extracting the data of class:  cup
Extracting the data of class:  sweet
Extracting the data of class:  ac
Extracting the data of class:  bill
Extracting the data of class:  thank-you
Extracting the data of class:  water
Extracting the data of class:  no
Extracting the data of class:  vegetables
Extracting the data of class:  pop corn
Extracting the data of class:  bitter
Extracting the data of class:  milk
Extracting the data of class:  allergy
Extracting the data of class:  spicy
Extracting the data of class:  cost
Extracting the data of class:  ice cream
Extracting the data of class:  chair
Extracting the data of class:  cheese
Extracting the data of class:  napkin
Extracting the data of class:  about
Extracting the data of class:  small
Extracting the data of class:  french fries
Extracting the data of class:  manager
Extracting the data of class:  salt
Extracting the data of class:  cold
Extracting the data of class:  warm
Extracting the data of

In [7]:
from keras.utils import to_categorical
one_hot_encoded_labels=to_categorical(labels)

In [8]:
seed_constant=27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [9]:
from sklearn.model_selection import train_test_split
features_train,features_test,labels_train,labels_test=train_test_split(features,one_hot_encoded_labels,test_size=0.2,shuffle=True,random_state=seed_constant)

In [10]:
a=features_train.shape[0]

In [37]:
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Reshape, Multiply, Bidirectional, LSTM, Dense

# Assuming you have already defined IMAGE_HEIGHT, IMAGE_WIDTH, and CLASSES_LIST

def create_cbam_bilrcn_model():
    model = Sequential()
    
    # CBAM - Convolutional Block Attention Module
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', data_format='channels_last', input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # Add CBAM attention here (you can implement it as a separate function or a custom layer)
    
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', data_format='channels_last'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # Add CBAM attention here
    
    # Reshape before BiLRCN
    model.add(Reshape((SEQUENCE_LENGTH, -1)))  # Use -1 to flatten the spatial dimensions
    model.add(Bidirectional(LSTM(units=32, activation='relu', return_sequences=True)))
    model.add(Bidirectional(LSTM(units=64, activation='relu', return_sequences=True)))
    
    # Global pooling or TimeDistributed pooling can be used here based on the data and problem
    model.add(GlobalAveragePooling2D())  # or TimeDistributed(Dense(64)) + GlobalAveragePooling1D()
    
    model.add(Dense(len(CLASSES_LIST), activation='softmax'))
    model.summary()
    return model


In [38]:
model1=create_cbam_bilrcn_model()

ValueError: Input 0 of layer "global_average_pooling2d_23" is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: (None, 10, 128)

In [None]:
from keras.callbacks import EarlyStopping
early_stopping_call=EarlyStopping(monitor='val_loss',patience=10,mode='min',restore_best_weights=True)
model1.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
r=model1.fit(x=features_train,y=labels_train,epochs=50,batch_size=32,shuffle=True,validation_split=0.2,callbacks=[early_stopping_call])