In [None]:
import json
import os
import pickle

In [None]:
import numpy as np

# === DATA PREPARATION ===
print("[INFO] Starting data preparation...")

# Load preprocessed sentences
preprocessed_file_path = '../Data/train/preprocessed_sentences.json'
with open(preprocessed_file_path, 'r') as f:
    preprocessed_data = json.load(f)
print("[INFO] Loaded preprocessed sentences.")

# Load the saved tokenizer to use its vocabulary size
with open('../Data/train/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
vocab_size = len(tokenizer.word_index) + 1

# Load max_seq_length
with open('../Data/train/max_seq_length.pkl', 'rb') as f:
    max_seq_length = pickle.load(f)

print(len(preprocessed_data))

preprocessed_data = preprocessed_data[0:100]

# Extract the preprocessed sequences directly
Y_data = [item['PREPROCESSED_SENTENCE'] for item in preprocessed_data]

# Adjust for decoder input and target
decoder_input_data = np.array(Y_data)[:, :-1]
decoder_target_data = np.array(Y_data)[:, 1:]

# Load video features from the directory
feature_dir = '../Data/train/features'
print("[INFO] Video features directory set.")

video_features = []
for item in preprocessed_data:
    file_path = os.path.join(feature_dir, f"{item['SENTENCE_NAME']}.json")
    try:
        with open(file_path, 'r') as f:
            video_features.append(json.load(f))
        print(len(video_features))
    except Exception as e:
        print(f"[ERROR] Failed to load: {file_path}. Error: {e}")

print(len(video_features))
print(len(preprocessed_data))

print("[INFO] Loaded video features.")


In [None]:
import numpy as np
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, Dropout
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping

max_frames = max([len(features) for features in video_features])
feature_length = len(video_features[0]['frame_0'][0][0])

# Prepare training data
X_data = np.zeros((len(video_features), max_frames, feature_length))
print(X_data.shape)
for i, features in enumerate(video_features):
    sorted_frames = sorted(features.items(), key=lambda x: x[0])
    for j, (_, frame_data) in enumerate(sorted_frames):
        X_data[i, j, :] = frame_data[0][0]

print(X_data.shape)

X_train, X_val, decoder_input_train, decoder_input_val, decoder_target_train, decoder_target_val = train_test_split(X_data, decoder_input_data, decoder_target_data, test_size=0.5, random_state=42)
print("[INFO] Split data into training and validation sets.")


In [None]:
# === MODEL CONSTRUCTION ===
print("[INFO] Constructing the model...")

units = 128
embedding_dim = 128

# Encoder
encoder_inputs = Input(shape=(None, feature_length), name='input_1')
encoder_lstm = LSTM(units, return_state=True, name='lstm_1')
_, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,), name='input_2')  # Removed the vocab_size dimension
decoder_embedding = Embedding(vocab_size, embedding_dim, name='embedding', mask_zero=True)
decoder_inputs_embedded = decoder_embedding(decoder_inputs)
decoder_lstm = LSTM(units, return_sequences=True, return_state=True, name='lstm_2')
decoder_lstm_out, _, _ = decoder_lstm(decoder_inputs_embedded, initial_state=encoder_states)

dropout_rate = 0.5
decoder_lstm_out = Dropout(dropout_rate)(decoder_lstm_out)

decoder_dense = Dense(vocab_size, activation='softmax', name='dense')
decoder_outputs = decoder_dense(decoder_lstm_out)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

from keras.optimizers.legacy import Adam

learning_rate = 0.003

optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])  # Adjusted loss for integer targets


In [None]:
# === TRAINING ===
print("[INFO] Starting training...")
early_stopping = EarlyStopping(monitor='val_loss', patience=100)
history = model.fit([X_train, decoder_input_train], decoder_target_train, validation_data=([X_val, decoder_input_val], decoder_target_val), batch_size=1, epochs=1000, callbacks=[early_stopping])
print("[INFO] Training finished.")


In [None]:
from keras.models import Model, load_model
from keras.layers import Input
import numpy as np
import pickle

# Load the trained model
model = load_model('seq2seq_model.h5')

# Load tokenizer and max_seq_length
with open('../Data/train/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

with open('../Data/train/max_seq_length.pkl', 'rb') as f:
    max_seq_length = pickle.load(f)

# Encoder inference model
encoder_inputs = model.get_layer('input_1').input
encoder_lstm = model.get_layer('lstm_1')
_, state_h, state_c = encoder_lstm.output
encoder_states = [state_h, state_c]
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder inference model
decoder_inputs = model.get_layer('input_2').input
decoder_state_input_h = Input(shape=(None,), name='input_3')
decoder_state_input_c = Input(shape=(None,), name='input_4')
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_embedding = model.get_layer('embedding')
decoder_lstm = model.get_layer('lstm_2')
decoder_lstm_out, state_h_dec, state_c_dec = decoder_lstm(decoder_embedding(decoder_inputs), initial_state=decoder_states_inputs)
decoder_states = [state_h_dec, state_c_dec]
decoder_dense = model.get_layer('dense')
decoder_outputs = decoder_dense(decoder_lstm_out)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

# Decoding sequence function
def decode_sequence(input_seq, video_name):
    # Encode the input sequence to get the internal state vectors.
    states_value = encoder_model.predict(input_seq)
    
    # Generate an empty target sequence of length 1 with only the start token.
    target_seq = np.array(tokenizer.texts_to_sequences(['start'])).reshape(1, 1)
    
    # Display the video name
    print(f"Video: {video_name}")
    
    # Sampling loop for a batch of sequences
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        
        # Sample the next token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word.get(sampled_token_index, '')
        decoded_sentence += ' ' + sampled_word
        
        # Exit condition: either hit max length or find the end token.
        if (sampled_word == 'end' or len(decoded_sentence.split()) > max_seq_length):
            stop_condition = True

        # Update the target sequence to the last predicted token.
        target_seq = np.array([[sampled_token_index]])
        
        # Update states for the next time step
        states_value = [h, c]

    return f"{decoded_sentence.replace(' end', '')}"  # Optionally remove the end token in the result


In [27]:
# === DATA PREPARATION ===
print("[INFO] Starting data preparation...")

# Load preprocessed sentences
preprocessed_file_path = './artifact/preprocessed_sentences.json'
with open(preprocessed_file_path, 'r') as f:
    preprocessed_data = json.load(f)
print("[INFO] Loaded preprocessed sentences.")

# Load the saved tokenizer to use its vocabulary size
with open('../Data/train/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
vocab_size = len(tokenizer.word_index) + 1

# Load max_seq_length
with open('../Data/train/max_seq_length.pkl', 'rb') as f:
    max_seq_length = pickle.load(f)

print(len(preprocessed_data))

preprocessed_data = preprocessed_data[0:100]

# Extract the preprocessed sequences directly
Y_data = [item['PREPROCESSED_SENTENCE'] for item in preprocessed_data]

# Adjust for decoder input and target
decoder_input_data = np.array(Y_data)[:, :-1]
decoder_target_data = np.array(Y_data)[:, 1:]

# Load video features from the directory
feature_dir = './artifact/features'
print("[INFO] Video features directory set.")

video_features = []
for item in preprocessed_data:
    file_path = os.path.join(feature_dir, f"{item['SENTENCE_NAME']}.json")
    try:
        with open(file_path, 'r') as f:
            video_features.append(json.load(f))
        print(len(video_features))
    except Exception as e:
        print(f"[ERROR] Failed to load: {file_path}. Error: {e}")

print(len(video_features))
print(len(preprocessed_data))

print("[INFO] Loaded video features.")


[INFO] Starting data preparation...
[INFO] Loaded preprocessed sentences.
3
[INFO] Video features directory set.
1
2
3
3
3
[INFO] Loaded video features.


In [28]:
max_frames = max([len(features) for features in video_features])
feature_length = len(video_features[0]['frame_0'][0][0])

# Prepare training data
X_data = np.zeros((len(video_features), max_frames, feature_length))
print(X_data.shape)
for i, features in enumerate(video_features):
    sorted_frames = sorted(features.items(), key=lambda x: x[0])
    for j, (_, frame_data) in enumerate(sorted_frames):
        X_data[i, j, :] = frame_data[0][0]

print(X_data.shape)

(3, 20, 4032)
(3, 20, 4032)


In [29]:
# Test
actual_sentences = []
predicted_sentences = []
for i in range(len(X_data)):
    sample_index = i
    input_seq = X_data[sample_index:sample_index+1]
    video_name = preprocessed_data[sample_index]['SENTENCE_NAME']
    actual_sentence = preprocessed_data[sample_index]['SENTENCE_DESCRIPTION']
    predicted_sentence = decode_sequence(input_seq, video_name)
    actual_sentences.append(actual_sentence)
    predicted_sentences.append(predicted_sentence.replace('start', '').strip())

Video: -0JdYlTf9Y8_11-5-rgb_front
Video: -BFCJ9zmfOo_9-8-rgb_front
Video: -AFID_P6YU0_15-8-rgb_front


In [30]:
for i in range(3):
    print("Actual Sentence:", actual_sentences[i])
    print("Predicted Sentence:", predicted_sentences[i])

Actual Sentence: It's really, what you get for your dollar.
Predicted Sentence: its really what you get for your dollar
Actual Sentence: And there we go; it's coming off nicely.
Predicted Sentence: and there we go its coming off nicely
Actual Sentence: And it'll do well in drought conditions.
Predicted Sentence: and itll do well in drought conditions
