In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# Parameters
IMG_SIZE = (128, 128)     # (width, height)
NUM_PAST = 2              # Number of past frames
NUM_FUTURE = 2            # Number of future frames
WINDOW_SIZE = NUM_PAST + 1 + NUM_FUTURE  # Total frames per window (5)
BATCH_SIZE = 64
EPOCHS = 10

def extract_frames(video_path):
    """Extracts all frames from a given video file."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def save_sequence(sequence, output_dir, video_name, seq_index):
    """Saves a sequence of frames into a dedicated subfolder."""
    seq_folder = os.path.join(output_dir, f"{video_name}_seq_{seq_index}")
    os.makedirs(seq_folder, exist_ok=True)
    for i, frame in enumerate(sequence):
        frame_path = os.path.join(seq_folder, f"frame_{i}.jpg")
        cv2.imwrite(frame_path, frame)

def generate_dataset(input_folder, train_folder, test_folder, train_ratio=0.8):
    """
    Processes each video in the input folder.

    Uses a sliding window (of WINDOW_SIZE frames) to generate sequences.
    Each sequence is randomly assigned to train or test.
    """
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith(".mp4"):
            video_path = os.path.join(input_folder, filename)
            video_name = os.path.splitext(filename)[0]
            frames = extract_frames(video_path)
            total_frames = len(frames)
            seq_index = 0
            for i in range(total_frames - WINDOW_SIZE + 1):
                sequence = frames[i:i + WINDOW_SIZE]
                # Randomly assign the sequence to training or testing set
                if np.random.rand() < train_ratio:
                    save_sequence(sequence, train_folder, video_name, seq_index)
                else:
                    save_sequence(sequence, test_folder, video_name, seq_index)
                seq_index += 1

def load_sequence(seq_path):
    """
    Loads and sorts frames from a sequence folder.
    Assumes frame filenames are like "frame_0.jpg", "frame_1.jpg", etc.
    """
    frames = []
    file_list = sorted([f for f in os.listdir(seq_path) if f.endswith(".jpg") and f.startswith("frame_")])
    for filename in file_list:
        img_path = os.path.join(seq_path, filename)
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, IMG_SIZE)
        img = img.astype(np.float32) / 255.0
        frames.append(img)
    return frames

def process_sequence_3d(frames, num_past=NUM_PAST, num_future=NUM_FUTURE):
    """
    Given a list of WINDOW_SIZE frames, forms:
      - Input: a 4-frame sequence formed by the first num_past frames and the last num_future frames.
      - Target: the middle frame.
    """
    if len(frames) != (num_past + 1 + num_future):
        return None, None
    # Input frames: past (frames 0 to num_past-1) and future (frames num_past+1 to end)
    input_frames = frames[:num_past] + frames[num_past+1:]
    target_frame = frames[num_past]
    return np.array(input_frames), np.array(target_frame)

def dataset_from_folder_3d(folder, num_past=NUM_PAST, num_future=NUM_FUTURE):
    """
    Builds a tf.data.Dataset from a folder containing sequence subfolders.
    Each item is a tuple: (input_frames, target_frame) where:
      - input_frames: shape (NUM_PAST+NUM_FUTURE, H, W, 3)
      - target_frame: shape (H, W, 3)
    """
    inputs_list, targets_list = [], []
    for seq_folder in os.listdir(folder):
        seq_path = os.path.join(folder, seq_folder)
        if os.path.isdir(seq_path):
            frames = load_sequence(seq_path)
            # Slide a window over the frames
            for i in range(len(frames) - WINDOW_SIZE + 1):
                window = frames[i:i+WINDOW_SIZE]
                inp, tar = process_sequence_3d(window, num_past, num_future)
                if inp is not None and tar is not None:
                    inputs_list.append(inp)
                    targets_list.append(tar)
    if not inputs_list:
        raise ValueError("No valid sequences found in folder {}.".format(folder))
    inputs_np = np.stack(inputs_list, axis=0)  # shape: (N, NUM_PAST+NUM_FUTURE, H, W, 3)
    targets_np = np.stack(targets_list, axis=0)  # shape: (N, H, W, 3)
    dataset = tf.data.Dataset.from_tensor_slices((inputs_np, targets_np))
    return dataset

def build_3d_conv_model(input_shape):
    """
    Builds a 3D CNN model.

    Input shape: (T, H, W, 3), where T = NUM_PAST+NUM_FUTURE (e.g. 4).
    The model aggregates temporal information and outputs a predicted frame (H, W, 3).
    """
    inputs = layers.Input(shape=input_shape)  # e.g., (4, 128, 128, 3)

    x = layers.Conv3D(64, (3,3,3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling3D(pool_size=(1,2,2))(x)  # spatial pooling only
    x = layers.Conv3D(128, (3,3,3), activation='relu', padding='same')(x)
    x = layers.MaxPooling3D(pool_size=(1,2,2))(x)
    x = layers.Conv3D(256, (3,3,3), activation='relu', padding='same')(x)
    #x = layers.MaxPooling3D(pool_size=(1,2,2))(x) # Removed this MaxPooling layer

    x = layers.Conv3DTranspose(128, (3,3,3), strides=(1,2,2), padding='same', activation='relu')(x)
    x = layers.Conv3DTranspose(64, (3,3,3), strides=(1,2,2), padding='same', activation='relu')(x)


    # Aggregate temporal information by averaging over the time dimension
    x = layers.Lambda(lambda t: tf.reduce_mean(t, axis=1))(x)  # now shape: (H, W, channels)
    output = layers.Conv2D(3, (3,3), activation='sigmoid', padding='same')(x)

    model = models.Model(inputs, output)
    return model

# Generate the dataset sequences from videos (if not already generated)
input_folder = "../input"    # Folder containing your mp4 videos
train_folder = "../train"    # Folder for training sequences
test_folder = "../test"      # Folder for testing sequences
generate_dataset(input_folder, train_folder, test_folder)

# Prepare datasets using the 3D pipeline
train_ds = dataset_from_folder_3d(train_folder)
train_ds = train_ds.shuffle(100).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_ds = dataset_from_folder_3d(test_folder)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Build and compile the 3D CNN model
# Input shape: (NUM_PAST+NUM_FUTURE, H, W, 3) => (4, 128, 128, 3)
input_shape = (NUM_PAST + NUM_FUTURE, IMG_SIZE[1], IMG_SIZE[0], 3)
model_3d = build_3d_conv_model(input_shape)
model_3d.compile(optimizer='adam', loss='mse', metrics=['mae'])

print("Training 3D CNN model for frame interpolation...")
history_3d = model_3d.fit(train_ds, epochs=EPOCHS, validation_data=test_ds)

print("Evaluating 3D CNN model on test data...")
loss, mae = model_3d.evaluate(test_ds)
print("Test Loss:", loss, "Test MAE:", mae)

# (Optional) Make predictions on test samples and inspect their shapes
for inputs, targets in test_ds.take(1):
    predictions = model_3d.predict(inputs)
    print("Predictions shape:", predictions.shape)


Training 3D CNN model for frame interpolation...
Epoch 1/10
