In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, losses, Model
from sklearn.model_selection import train_test_split

In [None]:
# Function to load frames from videos in a directory
def load_frames_from_directory(directory_path, frame_interval=1, target_size=(128,128)):
    video_files = [f for f in os.listdir(directory_path) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
    frames = []

    for video_file in video_files:
        video_path = os.path.join(directory_path, video_file)
        cap = cv2.VideoCapture(video_path)
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()  # Read a frame
            if not ret:
                break  # If no more frames, exit loop

            if frame_count % frame_interval == 0:
                frame = cv2.resize(frame, target_size)  # Resize frame
                frame = frame.astype('float32') / 255.0  # Normalize to [0, 1]
                frames.append(frame)  # Store preprocessed frame

            frame_count += 1

        cap.release()  # Release the video capture

    return np.array(frames)

In [3]:
# Define the Convolutional Autoencoder class
class ConvAutoencoder(Model):
    def __init__(self, latent_dim):
        super(ConvAutoencoder, self).__init__()

        # Define the encoder
        self.encoder = tf.keras.Sequential([
            layers.InputLayer(input_shape=(128,128, 3)),  # Correctly set the input shape
            layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(16, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(8, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
        ])

        # Bottleneck with a Dense layer
        self.bottleneck = tf.keras.Sequential([
            layers.Flatten(),  # Flatten the encoder output
            layers.Dense(latent_dim)  # Dense layer with the latent dimension
        ])

        # Define the decoder
        self.decoder = tf.keras.Sequential([
            layers.InputLayer(input_shape=(latent_dim,)),  # This accepts a 1D latent space
            layers.Reshape((1, 1, latent_dim)),  # Reshape back to 2D for transposed conv
            layers.Conv2DTranspose(4, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(8, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(16, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(32, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(64, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(128, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(256, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same'),  # Final output layer
        ])

    def call(self, x):
        # Encode the input
        encoded = self.encoder(x)
        # Get the latent representation
        latent_representation = self.bottleneck(encoded)
        # Decode to reconstruct the input
        decoded = self.decoder(latent_representation)
        return decoded

In [4]:
# Instantiate the ConvAutoencoder model with a latent dimension
latent_dim = 64
model = ConvAutoencoder(latent_dim)

# Compile the model
model.compile(optimizer='adam', loss='mse')

In [6]:
# Load frames from a directory with multiple video files
video_directory = 'E:\DeltaMod\Data'  # Your video directory
frame_interval = 10
training_data = load_frames_from_directory(video_directory, frame_interval=frame_interval)

In [7]:
# Ensure there's data to train on
if len(training_data) == 0:
    raise ValueError("No frames loaded from the specified directory.")

In [8]:
training_data.size

1733099520

In [9]:
# Split the data into train and validation sets
train_data, val_data = train_test_split(training_data, test_size=0.2, random_state=42)

In [10]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

# Ensure TensorFlow is using the GPU
tf.debugging.set_log_device_placement(True)  # Enable device placement logging


TensorFlow version: 2.10.1
Num GPUs Available: 1


In [11]:
print(train_data.shape, train_data.dtype)
print(val_data.shape, val_data.dtype)

(28208, 128, 128, 3) float32
(7052, 128, 128, 3) float32


In [12]:
!nvidia-smi

Sat May  4 12:35:09 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 528.49       Driver Version: 528.49       CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 5000    WDDM  | 00000000:61:00.0 Off |                  Off |
| 35%   46C    P8     7W / 230W |  15962MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set up the data augmentation generator
data_gen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

# Define early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=10,         # Stop if no improvement for 10 epochs
    verbose=1,           # Output early stopping messages
    restore_best_weights=True  # Restore the best model weights
)

# Train the model with the early stopping callback
model.fit(
    data_gen.flow(train_data, train_data, batch_size=4), 
    validation_data=data_gen.flow(val_data, val_data),
    epochs=500,
    callbacks=[early_stopping]  # Include early stopping
)


In [17]:
# Details of encoder layers
print("Encoder Layers:")
for i, layer in enumerate(model.encoder.layers):
    print(f"Layer {i + 1}: {layer.__class__.__name__}")
    print(f" - Output shape: {layer.output_shape}")  # Now this should work
    print(f" - Activation: {layer.activation if hasattr(layer, 'activation') else 'None'}")

# Details of bottleneck layers
print("\nBottleneck Layers:")
for i, layer in enumerate(model.bottleneck.layers):
    print(f"Layer {i + 1}: {layer.__class__.__name__}")
    print(f" - Output shape: {layer.output_shape}")
    print(f" - Activation: {layer.activation if hasattr(layer, 'activation') else 'None'}")

# Details of decoder layers
print("\nDecoder Layers:")
for i, layer in enumerate(model.decoder.layers):
    print(f"Layer {i + 1}: {layer.__class__.__name__}")
    print(f" - Output shape: {layer.output_shape}")
    print(f" - Activation: {layer.activation if hasattr(layer, 'activation') else 'None'}")


Encoder Layers:
Layer 1: Conv2D
 - Output shape: (None, 128, 128, 128)
 - Activation: <function relu at 0x0000020A9A79D4C0>
Layer 2: MaxPooling2D
 - Output shape: (None, 64, 64, 128)
 - Activation: None
Layer 3: Conv2D
 - Output shape: (None, 64, 64, 64)
 - Activation: <function relu at 0x0000020A9A79D4C0>
Layer 4: MaxPooling2D
 - Output shape: (None, 32, 32, 64)
 - Activation: None
Layer 5: Conv2D
 - Output shape: (None, 32, 32, 32)
 - Activation: <function relu at 0x0000020A9A79D4C0>
Layer 6: MaxPooling2D
 - Output shape: (None, 16, 16, 32)
 - Activation: None
Layer 7: Conv2D
 - Output shape: (None, 16, 16, 16)
 - Activation: <function relu at 0x0000020A9A79D4C0>
Layer 8: MaxPooling2D
 - Output shape: (None, 8, 8, 16)
 - Activation: None
Layer 9: Conv2D
 - Output shape: (None, 8, 8, 8)
 - Activation: <function relu at 0x0000020A9A79D4C0>
Layer 10: MaxPooling2D
 - Output shape: (None, 4, 4, 8)
 - Activation: None

Bottleneck Layers:
Layer 1: Flatten
 - Output shape: (None, 128)
 - Act

In [18]:
model.save('E:\DeltaMod\Data\model')



INFO:tensorflow:Assets written to: E:\DeltaMod\Data\model_1\assets


INFO:tensorflow:Assets written to: E:\DeltaMod\Data\model_1\assets
