In [None]:
import tensorflow as tf

# Check GPU availability
print("Is GPU available?", tf.test.is_gpu_available())
print("GPU Device:", tf.config.list_physical_devices('GPU'))


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


Is GPU available? False
GPU Device: []


In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define dataset path in Google Drive
drive_dataset_path = "/content/drive/MyDrive/saved_datasets.zip"

# Create a directory for extraction
output_dir = "/content/saved_datasets"
os.makedirs(output_dir, exist_ok=True)

# Unzip the dataset
import zipfile
with zipfile.ZipFile(drive_dataset_path, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

print(f"Dataset extracted to: {output_dir}")


Mounted at /content/drive
Dataset extracted to: /content/saved_datasets


In [None]:
!ls /content/saved_datasets


saved_datasets


In [None]:
# Correct Dataset Paths
base_dir = "/content/saved_datasets/saved_datasets"
train_path = os.path.join(base_dir, "train_dataset")
val_path = os.path.join(base_dir, "val_dataset")
test_path = os.path.join(base_dir, "test_dataset")

print("Dataset paths:")
print(f"Train: {train_path}")
print(f"Validation: {val_path}")
print(f"Test: {test_path}")


Dataset paths:
Train: /content/saved_datasets/saved_datasets/train_dataset
Validation: /content/saved_datasets/saved_datasets/val_dataset
Test: /content/saved_datasets/saved_datasets/test_dataset


In [None]:
!ls /content/saved_datasets/saved_datasets/train_dataset
!ls /content/saved_datasets/saved_datasets/val_dataset
!ls /content/saved_datasets/saved_datasets/test_dataset


8264606385342128425  dataset_spec.pb  snapshot.metadata
12001490812919011829  dataset_spec.pb  snapshot.metadata
12262735734570148572  dataset_spec.pb  snapshot.metadata


In [None]:
import tensorflow as tf

# Load datasets
train_dataset = tf.data.experimental.load(train_path)
val_dataset = tf.data.experimental.load(val_path)
test_dataset = tf.data.experimental.load(test_path)

print("Datasets loaded successfully!")

# Verify one batch
for sequence, label in train_dataset.take(1):
    print("Train Sequence Shape:", sequence.shape)
    print("Train Label:", label.numpy())


Instructions for updating:
Use `tf.data.Dataset.load(...)` instead.


Datasets loaded successfully!
Train Sequence Shape: (8, 32, 224, 224, 3)
Train Label: [170 430 433 226 466 512 194 499]


In [None]:
# model creation

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp /content/drive/MyDrive/trained_words.txt /content/


In [None]:
# Load trained words from the file
def load_trained_words(file_path):
    with open(file_path, "r") as f:
        return [line.strip() for line in f.readlines()]

trained_words = load_trained_words("/content/trained_words.txt")

# Initialize the tokenizer
class SimpleTokenizer:
    def __init__(self, vocabulary):
        self.word_to_index = {word: idx for idx, word in enumerate(vocabulary)}
        self.index_to_word = {idx: word for word, idx in self.word_to_index.items()}

    def __call__(self, word):
        return self.word_to_index.get(word, 0)  # 0 for unknown words (PAD)

    def vocab_size(self):
        return len(self.word_to_index)

tokenizer = SimpleTokenizer(trained_words)

print(f"Vocabulary size: {tokenizer.vocab_size()}")


Vocabulary size: 547


In [None]:

# Add a blank token to the vocabulary
blank_token = len(tokenizer.word_to_index)  # Assign the last index for the blank token
tokenizer.word_to_index["<BLANK>"] = blank_token
num_classes = len(tokenizer.word_to_index)  # Update the total number of classes


In [None]:


# # def prepare_ctc_inputs(dataset):
# #     """
# #     Prepares inputs for CTC loss by computing input and label lengths.

# #     Args:
# #         dataset: A TensorFlow dataset with (sequence, label) pairs.

# #     Returns:
# #         processed_dataset: A TensorFlow dataset with (inputs, labels, input_length, label_length).
# #     """
# #     def map_fn(sequence, label):
# #         # Compute input length: fixed length TARGET_SEQ_LEN for all inputs
# #         input_length = tf.fill([tf.shape(sequence)[0]], TARGET_SEQ_LEN)

# #         # Ensure label length and label shapes are compatible
# #         label_length = tf.fill([tf.shape(label)[0]], 1) if len(label.shape) == 1 else tf.shape(label)

# #         # Expand label dimensions if necessary to match required shape [batch_size, max_label_length]
# #         if len(label.shape) == 1:
# #             label = tf.expand_dims(label, axis=-1)
# #         print("Sequence shape:", tf.shape(sequence))
# #         print("Label shape:", tf.shape(label))
# #         print("Input Length:", input_length)
# #         print("Label Length:", label_length)

# #         return ((sequence, input_length), (label, label_length))

# #     return dataset.map(map_fn)
# def prepare_ctc_inputs(dataset):
#     """
#     Prepares inputs for CTC loss by computing input and label lengths.

#     Args:
#         dataset: A TensorFlow dataset with (sequence, label) pairs.

#     Returns:
#         processed_dataset: A TensorFlow dataset with (inputs, labels, input_length, label_length).
#     """
#     def map_fn(sequence, label):
#         # Compute input length
#         input_length = tf.fill([tf.shape(sequence)[0]], TARGET_SEQ_LEN)  # Assuming fixed TARGET_SEQ_LEN

#         # Compute label length
#         label_length = tf.expand_dims(tf.shape(label)[-1], axis=-1)  # Ensure it's compatible with stacking

#         # Expand label dimensions if necessary
#         if len(label.shape) == 1:
#             label = tf.expand_dims(label, axis=-1)

#         # Combine label and label_length
#         y_true = tf.concat([label, label_length], axis=-1)  # Concatenate along the last dimension

#         return ((sequence, input_length), y_true)

#     return dataset.map(map_fn)


In [None]:
# def prepare_ctc_inputs(dataset):
#     """
#     Prepares inputs for CTC loss by computing input and label lengths.

#     Args:
#         dataset: A TensorFlow dataset with (sequence, label) pairs.

#     Returns:
#         processed_dataset: A TensorFlow dataset with (inputs, labels, input_length, label_length).
#     """
#     def map_fn(sequence, label):
#         # Compute input length: fixed length TARGET_SEQ_LEN for all inputs
#         input_length = tf.fill([1], TARGET_SEQ_LEN)

#         # Compute label length
#         label_length = tf.shape(label)[0]  # Get the length of the label sequence

#         # Expand label_length to match the rank of label
#         label_length = tf.expand_dims(label_length, axis=0)  # Shape becomes [1]

#         # Ensure label has the right shape
#         # if len(label.shape) == 1:
#         #     label = tf.expand_dims(label, axis=-1)  # Shape becomes [seq_length, 1]

#         # No need to concatenate label and label_length here
#         # y_true = tf.concat([label, tf.tile(label_length, [tf.shape(label)[0]])], axis=-1)

#         return ((sequence, input_length), (label, label_length))

#     return dataset.map(map_fn)



In [None]:
# @tf.function
# def ctc_loss(y_true, y_pred):
#     """
#     Compute the CTC loss for the model.

#     Args:
#         y_true: Labels and their lengths. This should be a tuple of (labels, label_lengths)
#         y_pred: Logits output by the model.

#     Returns:
#         loss: Scalar tensor representing the mean CTC loss.
#     """
#     # Unpack y_true
#     labels, label_length = y_true

#     # Debug input shapes
#     tf.print("Labels shape (input):", tf.shape(labels))
#     tf.print("Label lengths shape (input):", tf.shape(label_length))
#     tf.print("y_pred shape (input):", tf.shape(y_pred))

#     # Cast labels and label_lengths to the correct dtype
#     labels = tf.cast(labels, tf.int32)
#     label_length = tf.cast(label_length, tf.int32)

#     # Debug extracted shapes
#     tf.print("Labels shape:", tf.shape(labels))
#     tf.print("Label lengths shape:", tf.shape(label_length))

#     # Transpose y_pred for CTC loss
#     y_pred = tf.transpose(y_pred, perm=[1, 0, 2])  # Transpose to (time_steps, batch_size, num_classes)

#     # Debug y_pred shape after transpose
#     tf.print("y_pred shape (transposed):", tf.shape(y_pred))

#     # Compute input lengths
#     batch_size = tf.shape(y_pred)[1]
#     input_length = tf.fill([batch_size], tf.shape(y_pred)[0])  # Time steps (32)

#     # Debug input lengths
#     tf.print("Input lengths shape:", tf.shape(input_length))

#     # Compute CTC loss
#     loss = tf.nn.ctc_loss(
#         labels=labels,
#         logits=y_pred,
#         label_length=label_length,
#         logit_length=input_length,
#         blank_index=-1,  # Assuming you are using TensorFlow 2.x
#         logits_time_major=True
#     )

#     return tf.reduce_mean(loss)

In [None]:
import tensorflow as tf
tf.__version__


'2.15.0'

In [None]:
!sudo pip3 install keras



In [None]:
# from tensorflow.keras import layers, models, Input, regularizers

# # Define input shape: (time_steps, height, width, channels)
# input_shape = (32, 224, 224, 3)  # 32 frames, 224x224 resolution, RGB channels
# num_classes = tokenizer.vocab_size()  # Total number of unique classes

# # Input layer
# inputs = Input(shape=input_shape)

# # 3D CNN Layers
# x = layers.Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu',
#                   padding='same', kernel_regularizer=regularizers.l2(1e-4))(inputs)
# x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
# x = layers.Dropout(0.3)(x)

# x = layers.Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu',
#                   padding='same', kernel_regularizer=regularizers.l2(1e-4))(x)
# x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
# x = layers.Dropout(0.3)(x)

# x = layers.Conv3D(filters=128, kernel_size=(3, 3, 3), activation='relu',
#                   padding='same', kernel_regularizer=regularizers.l2(1e-4))(x)
# x = layers.MaxPooling3D(pool_size=(2, 2, 2))(x)
# x = layers.Dropout(0.4)(x)

# # Reshape for LSTM Layer
# x = layers.TimeDistributed(layers.Flatten())(x)  # Flatten each frame
# x = layers.Reshape((32, -1))(x)  # Reshape to (time_steps, features)

# # LSTM Layer
# lstm_out = layers.Bidirectional(layers.LSTM(256, return_sequences=False, dropout=0.5))(x)

# # Dense Layer for Final Output
# outputs = layers.Dense(num_classes, activation='softmax')(lstm_out)

# # Create the model
# model = models.Model(inputs=inputs, outputs=outputs)


from tensorflow.keras import layers, models, Input, regularizers
from tensorflow.keras.applications import ResNet50

# Define input shape: (time_steps, height, width, channels)
input_shape = (32, 224, 224, 3)  # 32 frames, 224x224 resolution, RGB channels
num_classes = tokenizer.vocab_size()  # Total number of unique classes

# Input layer
inputs = Input(shape=input_shape)

# Pretrained ResNet50 for Feature Extraction (applied to each frame)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze pretrained layers

# Apply ResNet50 to each frame using TimeDistributed
x = layers.TimeDistributed(base_model)(inputs)
x = layers.TimeDistributed(layers.Flatten())(x)  # Flatten the ResNet50 output

# Add Temporal Attention Mechanism
attention = layers.Attention()([x, x])

# LSTM Layers
x = layers.Bidirectional(layers.LSTM(256, return_sequences=True, dropout=0.5))(attention)
x = layers.Bidirectional(layers.LSTM(256, return_sequences=False, dropout=0.5))(x)

# Fully Connected Layers
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)  # Additional Dropout for regularization

# Output Layer
outputs = layers.Dense(num_classes, activation='softmax')(x)

# Create the model
model = models.Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Summary of the Model
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 32, 224, 224, 3)]    0         []                            
                                                                                                  
 time_distributed (TimeDist  (None, 32, 7, 7, 2048)       2358771   ['input_1[0][0]']             
 ributed)                                                 2                                       
                                                                                                  
 time_distributed_1 (TimeDi  (None, 32, 100352)           0         ['time_distributed[0][0]']    
 stributed)                                                                                       
                                                                                              

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Integer labels
              metrics=['accuracy'])

In [None]:


# # Prepare dataset for CTC training
# train_dataset_ctc = prepare_ctc_inputs(train_dataset)
# val_dataset_ctc = prepare_ctc_inputs(val_dataset)

In [None]:
# check if label is tensor

In [None]:
# for batch in train_dataset_ctc.take(1):
#     _, (label,_) = batch
#     print( isinstance(label, tf.Tensor) and label.dtype != tf.variant)


In [None]:
# for batch in train_dataset_ctc.take(1):
#     (x, x_len), (y, y_len) = batch

#     # Debug input shapes
#     print("Input shape (x):", tf.shape(x))
#     print("Input length (x_len):", x_len)

#     # Debug label shapes
#     print("Label shape (y):", tf.shape(y))
#     print("Label length (y_len):", y_len)


In [None]:
for batch in train_dataset.take(5):
    sequence, label = batch

    # Check for NaN only in sequences (assumed float32)
    print("Sequence contains NaN:", tf.math.reduce_any(tf.math.is_nan(sequence)).numpy())
    print("Label Data Type:", label.dtype)  # Confirm label type (should be int32)


Sequence contains NaN: False
Label Data Type: <dtype: 'int32'>
Sequence contains NaN: False
Label Data Type: <dtype: 'int32'>
Sequence contains NaN: False
Label Data Type: <dtype: 'int32'>
Sequence contains NaN: False
Label Data Type: <dtype: 'int32'>
Sequence contains NaN: False
Label Data Type: <dtype: 'int32'>


In [None]:
# Check ranges of sequences and labels in the dataset
import tensorflow as tf

def validate_dataset(dataset, num_classes):
    """
    Validates that sequences are normalized (0-1) and labels are in the valid range.

    Args:
        dataset: TensorFlow dataset to validate.
        num_classes: The total number of classes (num_classes - 1 is the maximum valid label).

    Returns:
        None
    """
    for batch in dataset.take(5):  # Check first 5 batches
        sequences, labels = batch

        # Check sequence range
        min_val = tf.reduce_min(sequences).numpy()
        max_val = tf.reduce_max(sequences).numpy()
        print(f"Sequence Range: Min = {min_val}, Max = {max_val}")

        if min_val < 0.0 or max_val > 1.0:
            print("⚠️ Sequence values are out of range! Expected [0, 1].")
        else:
            print("✅ Sequence values are in the range [0, 1].")

        # Check label range
        min_label = tf.reduce_min(labels).numpy()
        max_label = tf.reduce_max(labels).numpy()
        print(f"Label Range: Min = {min_label}, Max = {max_label}")

        if min_label < 0 or max_label >= num_classes:
            print(f"⚠️ Labels are out of range! Expected [0, {num_classes-1}].")
        else:
            print("✅ Labels are in the valid range.")

# Number of classes (vocabulary size)
num_classes = len(tokenizer.word_to_index)

# Run validation on datasets
print("Validating Train Dataset:")
validate_dataset(train_dataset, num_classes)

print("\nValidating Validation Dataset:")
validate_dataset(val_dataset, num_classes)

print("\nValidating Test Dataset:")
validate_dataset(test_dataset, num_classes)


Validating Train Dataset:
Sequence Range: Min = 0.0, Max = 0.003921568859368563
✅ Sequence values are in the range [0, 1].
Label Range: Min = 170, Max = 512
✅ Labels are in the valid range.
Sequence Range: Min = 0.0, Max = 0.003921568859368563
✅ Sequence values are in the range [0, 1].
Label Range: Min = 12, Max = 526
✅ Labels are in the valid range.
Sequence Range: Min = 0.0, Max = 0.003921568859368563
✅ Sequence values are in the range [0, 1].
Label Range: Min = 6, Max = 503
✅ Labels are in the valid range.
Sequence Range: Min = 0.0, Max = 0.003921568859368563
✅ Sequence values are in the range [0, 1].
Label Range: Min = 6, Max = 483
✅ Labels are in the valid range.
Sequence Range: Min = 0.0, Max = 0.0039061899296939373
✅ Sequence values are in the range [0, 1].
Label Range: Min = 11, Max = 540
✅ Labels are in the valid range.

Validating Validation Dataset:
Sequence Range: Min = 0.0, Max = 0.003921568859368563
✅ Sequence values are in the range [0, 1].
Label Range: Min = 175, Max = 

In [None]:
# for batch in train_dataset_ctc.take(1):
#     (x, x_len), (y, y_len) = batch
#     print("Input shape (x):", tf.shape(x))         # [batch_size, max_time, feature_dim]
#     print("Input length (x_len):", x_len)         # [batch_size]
#     print("Label shape (y):", tf.shape(y))        # [batch_size, max_label_length]
#     print("Label length (y_len):", y_len)         # [batch_size]



In [None]:



# # Reshape the data before fitting the model
# def reshape_data(sequence, label):
#   """Reshapes the sequence to match the expected input shape."""
#   sequence = tf.expand_dims(sequence, axis=0)  # Add a batch dimension
#   sequence = tf.tile(sequence, [8, 1, 1, 1, 1])  # Repeat along time dimension
#   return sequence, label

# train_dataset = train_dataset.map(reshape_data)
# val_dataset = val_dataset.map(reshape_data)


In [None]:
history = model.fit(train_dataset, validation_data=val_dataset, epochs=20)

Epoch 1/20
  7/167 [>.............................] - ETA: 3:44:21 - loss: 6.3422 - accuracy: 0.0000e+00

In [None]:
for batch in train_dataset.take(1):
    x_batch, y_batch = batch
    print(f"Input batch shape: {x_batch.shape}")  # Expected: (batch_size, 32, 224, 224, 3)
    print(f"Label batch shape: {y_batch.shape}")  # Expected: (batch_size,)


In [None]:
# Debugging the dataset shape
for batch in train_dataset.take(1):
    x_batch, y_batch = batch
    print(f"Input batch shape: {x_batch.shape}")  # Expected: (batch_size, 32, 224, 224, 3)
    print(f"Label batch shape: {y_batch.shape}")  # Expected: (batch_size,)


In [None]:
train_dataset = train_dataset.map(lambda x, y: (tf.cast(x, tf.float32), tf.cast(y, tf.int32)))


In [None]:
# Evaluate on Test Set
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_accuracy}")

In [None]:
# check shape of tensor

In [None]:


# # Iterate through the first batch
# for batch in train_dataset_ctc.take(2):
#     ((sequence, input_length), (label, label_length)) = batch  # Unpack the batch

#     # print("Sequence:")
#     # print(sequence.numpy()) # Convert to NumPy array for printing
#     print("Sequence Shape:", sequence.shape)
#     print("Input Length:", input_length.numpy())

#     print("\nLabel:")
#     print(label.numpy())
#     print("Label Shape:", label.shape)
#     print("Label Length:", label_length.numpy())
