In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# ========================
# GPU Memory Configuration
# ========================
# For a 4GB GPU, we want to avoid TensorFlow grabbing all memory at once.
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    try:
        # Enable memory growth to let TensorFlow allocate GPU memory as needed
        for gpu in physical_devices:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU memory growth enabled.")
    except Exception as e:
        print("Error setting GPU memory growth:", e)
else:
    print("No GPU found. Running on CPU.")

# ========================
# Parameters & Configuration
# ========================
block_size = 64
latent_block_size = 32
input_video_path = './input.mp4'
num_frames = 50
batch_size = 1  # Keep batch size small to ease GPU memory usage

# Optional: Downscale high-res videos to avoid memory issues
# Adjust these values if needed or set them to None to skip resizing.
target_width = 640
target_height = 480

# ========================
# Extract Frames from Video
# ========================
cap = cv2.VideoCapture(input_video_path)
frames = []
frame_count = 0
while frame_count < num_frames:
    ret, frame = cap.read()
    if not ret:
        break
    # Resize frame if target dimensions are set (helps on a 4GB GPU)
    if target_width is not None and target_height is not None:
        frame = cv2.resize(frame, (target_width, target_height))
    # Convert BGR (OpenCV default) to RGB for TensorFlow consistency
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frames.append(frame)
    frame_count += 1
cap.release()
frames = np.array(frames)

# ========================
# Pad Frames to a Multiple of block_size
# ========================
original_height, original_width = frames.shape[1:3]
pad_height = (block_size - (original_height % block_size)) % block_size
pad_width = (block_size - (original_width % block_size)) % block_size
padded_height = original_height + pad_height
padded_width = original_width + pad_width

padded_frames = [
    np.pad(frame, ((0, pad_height), (0, pad_width), (0, 0)), mode='constant')
    for frame in frames
]

# ========================
# Utility Functions: Splitting and Reassembling Blocks
# ========================
def split_into_blocks(frame, b_size):
    """Split a frame into non-overlapping blocks of size b_size x b_size."""
    h, w = frame.shape[:2]
    blocks = []
    for i in range(0, h, b_size):
        for j in range(0, w, b_size):
            blocks.append(frame[i:i+b_size, j:j+b_size])
    return np.array(blocks)

def reassemble_blocks(blocks, b_size, target_shape):
    """Reassemble blocks into a full image of shape target_shape."""
    h, w, c = target_shape
    num_blocks_h = h // b_size
    num_blocks_w = w // b_size
    reassembled = np.zeros((num_blocks_h * b_size, num_blocks_w * b_size, c), dtype=np.float32)
    for i in range(num_blocks_h):
        for j in range(num_blocks_w):
            idx = i * num_blocks_w + j
            reassembled[i*b_size:(i+1)*b_size, j*b_size:(j+1)*b_size] = blocks[idx]
    return reassembled

def split_latent_blocks(latent_frame, b_size):
    """Split a latent frame into blocks of size b_size x b_size."""
    h, w = latent_frame.shape[:2]
    blocks = []
    for i in range(0, h, b_size):
        for j in range(0, w, b_size):
            blocks.append(latent_frame[i:i+b_size, j:j+b_size])
    return np.array(blocks)

# ========================
# Prepare Data for Autoencoder Training
# ========================
all_blocks = []
for frame in padded_frames:
    blocks = split_into_blocks(frame, block_size)
    all_blocks.extend(blocks)
all_blocks = np.array(all_blocks, dtype=np.float32) / 255.0  # Normalize pixels to [0,1]

# Create a tf.data.Dataset (this avoids loading everything into GPU at once)
dataset = tf.data.Dataset.from_tensor_slices((all_blocks, all_blocks))
dataset = dataset.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

# ========================
# Define the Autoencoder Architecture
# ========================
# Encoder: Reduce block size from 64x64x3 to 32x32x32
encoder = models.Sequential([
    layers.Input(shape=(block_size, block_size, 3)),
    layers.Conv2D(32, kernel_size=4, strides=2, padding='same', activation='relu'),
])
encoder.summary()

# Decoder: Reconstruct from latent space back to 64x64x3
decoder = models.Sequential([
    layers.Input(shape=(latent_block_size, latent_block_size, 32)),
    layers.Conv2DTranspose(32, kernel_size=4, strides=2, padding='same', activation='relu'),
    layers.Conv2D(3, kernel_size=3, padding='same', activation='sigmoid')
])
decoder.summary()

# Combine Encoder and Decoder into an Autoencoder model
autoencoder_input = encoder.input
autoencoder_output = decoder(encoder.output)
autoencoder = models.Model(autoencoder_input, autoencoder_output)
autoencoder.compile(optimizer='adam', loss='mse')

# ========================
# Train the Autoencoder
# ========================
autoencoder.fit(dataset, epochs=10)

# ========================
# Generate Latent Video Representations
# ========================
latent_frames = []
for frame in padded_frames:
    blocks = split_into_blocks(frame, block_size)
    blocks = blocks.astype(np.float32) / 255.0
    # Process blocks through the encoder (using batch_size to control memory usage)
    latent_blocks = encoder.predict(blocks, batch_size=batch_size)
    # Reassemble latent blocks into a latent representation of the frame
    latent_frame = reassemble_blocks(latent_blocks, latent_block_size, (padded_height//2, padded_width//2, 32))
    latent_frames.append(latent_frame)
latent_frames = np.array(latent_frames)
np.save('latent_video.npy', latent_frames)
print("Latent video saved as 'latent_video.npy'.")

# ========================
# Reconstruct Video from Latent Representations
# ========================
latent_frames = np.load('latent_video.npy')
reconstructed_frames = []
for latent_frame in latent_frames:
    blocks = split_latent_blocks(latent_frame, latent_block_size)
    decoded_blocks = decoder.predict(blocks, batch_size=batch_size)
    recon_frame = reassemble_blocks(decoded_blocks, block_size, (padded_height, padded_width, 3))
    # Crop out the padding and scale pixel values back to [0,255]
    recon_frame = recon_frame[:original_height, :original_width] * 255.0
    reconstructed_frames.append(recon_frame.astype(np.uint8))

# ========================
# Save the Reconstructed Video
# ========================
output_video_path = 'reconstructed.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, 30.0, (original_width, original_height))
for frame in reconstructed_frames:
    # Convert RGB back to BGR for OpenCV
    out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
out.release()

print("Reconstructed video saved as:", output_video_path)


InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.