In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
import os
from tensorflow.keras.preprocessing import image
import numpy as np
from keras.applications.vgg16 import preprocess_input
import tensorflow as tf

In [2]:
model = VGG16(weights = "imagenet" , include_top = False , input_shape = (224,224,3))

In [3]:
print(model.summary())

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [4]:
image_dir = r'\\Ai3\research\KeyFrames\vid0'
all_features = []

In [5]:
def extract_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    
    #vgg16 requires image of 2dimension but img_array is 1dimension
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return features

In [6]:
for file in os.listdir(image_dir):
    if file.endswith('.jpg'):
        img_path = os.path.join(image_dir, file)  # Construct image path
        features = extract_features(img_path, model)
        print("Shape of features for", img_path, ":", features.shape)
        all_features.append(features)

Shape of features for \\Ai3\research\KeyFrames\vid0\frame0.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame1.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame10.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame11.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame12.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame13.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame2.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame3.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame4.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame5.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame6.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research\KeyFrames\vid0\frame7.jpg : (1, 7, 7, 512)
Shape of features for \\Ai3\research

## Attention Layer

In [7]:
import tensorflow as tf

class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        
    def call(self, features, hidden):
        # Expand the hidden state to match the time dimension of features
        hidden_with_time_axis = tf.expand_dims(hidden, axis=2)
        
        # Tile the hidden state to match the number of frames in features
        #hidden_tiled = tf.tile(hidden_with_time_axis, [1, 1, features.shape[1], 1])
        hidden_tiled = tf.tile(tf.expand_dims(hidden, 1), [1, features.shape[1], 1])
        # Compute the score
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_tiled))
        score = self.V(score)
        
        # Compute attention weights
        attention_weights = tf.nn.softmax(score, axis=2)
        
        # Compute the context vector
        context_vector = attention_weights * features
        context_vector = tf.reduce_sum(context_vector, axis=2)
        
        return context_vector, attention_weights

# # Example data shapes

# hidden_shape = (32, 14, 256)

# # Generate random tensors with the given shapes

# hidden = tf.random.normal(hidden_shape)

# # Create an instance of BahdanauAttention
# units = 256
# attention = BahdanauAttention(units)
# features = tf.convert_to_tensor(all_features, dtype=tf.float32)
# # Pass the features tensor and the hidden state tensor to the attention mechanism
# context_vector, attention_weights = attention(features, hidden)

# print("Context vector shape:", context_vector.shape)
# print("Attention weights shape:", attention_weights.shape)




## Define LSTM model

In [8]:
class MyModel(tf.keras.Model):
    def __init__(self, units, output_dim):
        super(MyModel, self).__init__()
        self.units = units
        self.lstm = tf.keras.layers.LSTM(units)
        self.attention = BahdanauAttention(units)
        self.fc = tf.keras.layers.Dense(output_dim)
        
    def call(self, features, annotations):
        # Pass the features through the LSTM layer
        lstm_output = self.lstm(features)
        
        # Compute context vector using BahdanauAttention
        context_vector, attention_weights = self.attention(features, lstm_output)
        
        # Concatenate the context vector with the annotations
        combined_input = tf.concat([context_vector, annotations], axis=1)
        
        # Pass the combined input through a fully connected layer
        output = self.fc(combined_input)
        
        return output, attention_weights

# Example data shapes
batch_size = 32
num_frames = 10
feature_dim = 512
hidden_units = 256
output_dim = 10  # Example output dimension

# Generate random feature tensors with the given shapes
features = tf.random.normal((batch_size, num_frames, feature_dim))

# Generate random annotation tensors with the given shapes
annotations = tf.random.normal((batch_size, output_dim))

# Create an instance of MyModel
model = MyModel(hidden_units, output_dim)

# Pass the features and annotations through the model
output, attention_weights = model(features, annotations)

print("Output shape:", output.shape)
print("Attention weights shape:", attention_weights.shape)

Output shape: (32, 10)
Attention weights shape: (32, 10, 1)


In [9]:
print((all_features[10].shape))


(1, 7, 7, 512)


In [10]:
print(all_features.shape)

AttributeError: 'list' object has no attribute 'shape'

In [24]:
print(len(all_features[0]))

1


In [25]:
#LSTM
