In [None]:
import os
import numpy as np
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, Dropout, MultiHeadAttention, TimeDistributed, MaxPool2D, BatchNormalization, Dense, Input, Reshape, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from sklearn.model_selection import train_test_split

In [None]:
data_dir = 'saved_np_data/'
features_path = os.path.join(data_dir, 'features.npy')
labels_path = os.path.join(data_dir, 'labels.npy')

In [None]:
features = np.load(features_path)
labels = np.load(labels_path)

In [None]:
print(features.shape)
print(labels.shape)

(1113, 10, 100, 100, 3)
(1113,)


## Samples = 1113
## Sequence Size = 10
## img_dim = (100, 100, 3)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.3)

In [None]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (779, 10, 100, 100, 3)
y_train shape: (779,)
X_test shape: (334, 10, 100, 100, 3)
y_test shape: (334,)


In [None]:
# model = Sequential()

# model.add(TimeDistributed(Conv2D(32, 3, activation = 'relu'), input_shape = (10,100,100,3)))
# model.add(TimeDistributed(BatchNormalization()))

# model.add(TimeDistributed(Conv2D(64, 3, activation = 'relu')))
# model.add(TimeDistributed(BatchNormalization()))

# model.add(TimeDistributed(Conv2D(128, 3, activation = 'relu')))
# model.add(TimeDistributed(BatchNormalization()))

# model.add(TimeDistributed(Flatten()))

# num_heads = 8  # You can adjust the number of attention heads as needed
# key_dim = 64  # Adjust the number of units for the attention mechanismx

# attention_layer = MultiHeadAttention(key_dim=key_dim, num_heads=num_heads)

# # Reshape the output from the previous layers to be 3D (batch_size * num_tokens, height * width, channels)
# reshape_layer = TimeDistributed(Flatten())
# attention_input = reshape_layer(model.layers[-1].output)

# # Apply Multi-Head Attention
# attention_output = attention_layer(attention_input, attention_input)
# # model.add(Dense(6))


input_shape = (10, 100, 100, 3)
video_input = Input(shape=input_shape)

# Feature Extraction using Conv2D layers with BatchNormalization
x = TimeDistributed(Conv2D(32, 3, activation='relu'))(video_input)
x = Dropout(0.3)(x)
x = TimeDistributed(BatchNormalization())(x)

x = TimeDistributed(Conv2D(64, 3, activation='relu'))(x)
x = Dropout(0.3)(x)
x = TimeDistributed(BatchNormalization())(x)

x = TimeDistributed(Conv2D(128, 3, activation='relu'))(x)
x = Dropout(0.3)(x)
x = TimeDistributed(BatchNormalization())(x)

# Reshape the output from the previous layers to be 3D (batch_size * num_tokens, height * width, channels)
x = TimeDistributed(Flatten())(x)

# Multi-Head Attention for Temporal Feature Extraction
num_heads = 8  # You can adjust the number of attention heads as needed
key_dim = 64  # Adjust the number of units for the attention mechanism

# Create the MultiHeadAttention layer
attention_layer = MultiHeadAttention(key_dim=key_dim, num_heads=num_heads)

# Reshape the output from the previous layers to be 3D (batch_size * num_tokens, height * width, channels)
attention_input = Reshape((-1, x.shape[-1]))(x)

# Apply Multi-Head Attention
attention_output = attention_layer(attention_input, attention_input)

x = Flatten()(attention_output)

x = Dense(512, activation = 'relu')(x)
x = Dropout(0.3)(x)
output = Dense(7, activation = 'softmax')(x) 

In [None]:
model = Model(inputs=video_input, outputs=output)

In [None]:
# model.build(input_shape = (10,100,100,3))
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 10, 100, 10  0           []                               
                                0, 3)]                                                            
                                                                                                  
 time_distributed_7 (TimeDistri  (None, 10, 98, 98,   896        ['input_2[0][0]']                
 buted)                         32)                                                               
                                                                                                  
 dropout_4 (Dropout)            (None, 10, 98, 98,   0           ['time_distributed_7[0][0]']     
                                32)                                                         