In [1]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, losses, Model




In [2]:
# Load video data
# cap = cv2.VideoCapture('vid1.mp4')
# Load .npy video files
video = np.load('01_Data/Air_Force_One.npy')

In [3]:
def preprocess_frame(frame):
    # Preprocess frame here (e.g., resize, normalize)
    frame = cv2.resize(frame, (128, 128))  # Resize to match the expected input shape
    frame = frame.astype('float32') / 255.0  # Normalize to [0, 1]
    return frame


In [4]:
# Preprocess frames
video_data = np.array([preprocess_frame(frame) for frame in video])

In [5]:
video_data.__sizeof__()

1966080160

In [6]:
# Convert frames to numpy array
# video_data = np.array(frames)

In [7]:
# Define Convolutional Autoencoder model
class ConvAutoencoder(Model):
    def __init__(self, latent_dim):
        super(ConvAutoencoder, self).__init__()
        self.latent_dim = latent_dim
        self.encoder = tf.keras.Sequential([
            layers.Input(shape=(128, 128, 3)),  # Assuming color frames
            layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(16, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(8, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            layers.Conv2D(8, (3, 3), activation='relu', padding='same'),
            layers.MaxPooling2D((2, 2), padding='same'),
            ])
        self.decoder = tf.keras.Sequential([
            layers.Conv2DTranspose(4, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(8, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(16, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(32, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2DTranspose(64, (3, 3), strides=2, activation='relu', padding='same'),
            layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same'),
        ])

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


In [8]:
# Instantiate the convolutional autoencoder
latent_dim = 64
conv_autoencoder = ConvAutoencoder(latent_dim)





In [9]:
# Compile the model
conv_autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())




In [10]:
# Train the model
conv_autoencoder.fit(video_data, video_data, epochs=5)

Epoch 1/5

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x246e4f49890>

In [11]:
# Modify the summarization loop to skip frames
skip_factor = 1 # Change this value to adjust the frame rate
summarized_frames = []
for i, frame in enumerate(video_data):
    if i % skip_factor == 0:  # Skip frames based on the skip_factor
        encoded_frame = conv_autoencoder.encoder(np.expand_dims(frame, axis=0)).numpy()
        decoded_frame = conv_autoencoder.decoder(encoded_frame).numpy()[0]
        summarized_frames.append(decoded_frame)


In [12]:
# Save summarized video in MP4 format
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')  # Define MP4 codec
out = cv2.VideoWriter('summarized_video2.mp4', fourcc, 60.0, (128, 128), isColor=True)  # Set isColor=False for grayscale video

for frame in summarized_frames:
    # Ensure frame is in uint8 format
    frame_uint8 = (frame * 255).astype(np.uint8)
    # Resize the frame to match output dimensions
    frame_resized = cv2.resize(frame_uint8, (128, 128))
    out.write(frame_resized)

out.release()
cv2.destroyAllWindows()


In [13]:
# Example of reconstructing a frame
encoded_frame = conv_autoencoder.encoder(np.expand_dims(video_data[0], axis=0)).numpy()
decoded_frame = conv_autoencoder.decoder(encoded_frame).numpy()


In [14]:
# Resize original frame for better visualization
original_height, original_width, _ = video_data[0].shape
original_frame_resized = cv2.resize(video_data[0], (original_width * 10, original_height * 10))

# Resize reconstructed frame for better visualization
decoded_frame_resized = cv2.resize(decoded_frame[0], (original_width * 10, original_height * 10))

# Display original and resized reconstructed frame
# cv2.imshow('Original Frame', original_frame_resized)
# cv2.imshow('Reconstructed Frame', decoded_frame_resized)
# cv2.waitKey(0) 
# cv2.destroyAllWindows()

In [15]:
# Iterate through frames and display original and reconstructed frames
for i in range(len(video_data)):
    original_frame = video_data[i]

    # Reconstruct frame
    encoded_frame = conv_autoencoder.encoder(np.expand_dims(original_frame, axis=0)).numpy()
    reconstructed_frame = conv_autoencoder.decoder(encoded_frame).numpy()[0]

    # Resize frames for better visualization
    original_frame_resized = cv2.resize(original_frame, (original_width * 20, original_height * 20))
    reconstructed_frame_resized = cv2.resize(reconstructed_frame, (original_width * 20, original_height * 20))

    # Display original and reconstructed frames
    cv2.imshow('Original Frame', original_frame_resized)
    cv2.imshow('Reconstructed Frame', reconstructed_frame_resized)
    cv2.waitKey(10)  # Adjust delay between frames (in milliseconds) as needed
    if cv2.waitKey(0) & 0xFF == ord('q'):  # Press 'q' to exit
        break

cv2.destroyAllWindows()