In [1]:
import cv2
import os
import random

def extract_frames(video_path):
    """Extracts all frames from a given video file."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def save_sequence(sequence, output_dir, video_name, seq_index):
    """Saves a sequence of frames into a dedicated subfolder."""
    seq_folder = os.path.join(output_dir, f"{video_name}_seq_{seq_index}")
    os.makedirs(seq_folder, exist_ok=True)
    for i, frame in enumerate(sequence):
        frame_path = os.path.join(seq_folder, f"frame_{i}.jpg")
        cv2.imwrite(frame_path, frame)

def generate_dataset(input_folder, train_folder, test_folder, num_interpolated=3, train_ratio=0.8):
    """
    Processes each video in the input folder.

    For each video, a sliding window of length (num_interpolated + 2) is used
    to generate sequences where the first and last frames are the inputs for interpolation,
    and the frames in between are used as ground truth.

    Each sequence is randomly assigned to train or test.
    """
    # The total sequence length includes the starting and ending frames
    sequence_length = num_interpolated + 2

    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith(".mp4"):
            video_path = os.path.join(input_folder, filename)
            video_name = os.path.splitext(filename)[0]
            frames = extract_frames(video_path)
            total_frames = len(frames)
            seq_index = 0
            # Slide a window over the frames to generate sequences
            for i in range(total_frames - sequence_length + 1):
                sequence = frames[i:i + sequence_length]
                # Randomly assign the sequence to training or testing set
                if random.random() < train_ratio:
                    save_sequence(sequence, train_folder, video_name, seq_index)
                else:
                    save_sequence(sequence, test_folder, video_name, seq_index)
                seq_index += 1

if __name__ == "__main__":
    input_folder = "input"    # Folder containing your mp4 videos
    train_folder = "train"    # Output folder for training sequences
    test_folder = "test"      # Output folder for testing sequences
    generate_dataset(input_folder, train_folder, test_folder)


In [3]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# Parameters
IMG_SIZE = (128, 128)     # Resize frames to a fixed resolution
NUM_INTERPOLATED = 3      # Number of frames to interpolate between the start and end
BATCH_SIZE = 32
EPOCHS = 5

def load_sequence(seq_path):
    """
    Loads and sorts frames from a sequence folder.
    Assumes frame filenames are like "frame_0.jpg", "frame_1.jpg", etc.
    """
    frames = []
    # Sort filenames to ensure correct order
    file_list = sorted([f for f in os.listdir(seq_path) if f.endswith(".jpg") and f.startswith("frame_")])
    for filename in file_list:
        img_path = os.path.join(seq_path, filename)
        # Read using OpenCV, convert from BGR to RGB, and resize
        img = cv2.imread(img_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, IMG_SIZE)
        img = img.astype(np.float32) / 255.0
        frames.append(img)
    return frames

def process_sequence(frames):
    """
    Given a list of frames, forms:
      - Input: concatenated first and last frames (shape: H x W x 6)
      - Target: concatenated intermediate frames (shape: H x W x (NUM_INTERPOLATED*3))
    """
    # Ensure there are enough frames
    if len(frames) < NUM_INTERPOLATED + 2:
        return None, None
    input_frames = [frames[0], frames[-1]]
    target_frames = frames[1:-1]
    # Concatenate along the channel axis
    input_tensor = np.concatenate(input_frames, axis=-1)
    target_tensor = np.concatenate(target_frames, axis=-1)
    return input_tensor, target_tensor

def dataset_from_folder(folder):
    """
    Builds a tf.data.Dataset from a folder containing sequence subfolders.
    """
    inputs, targets = [], []
    for seq_folder in os.listdir(folder):
        seq_path = os.path.join(folder, seq_folder)
        if os.path.isdir(seq_path):
            frames = load_sequence(seq_path)
            inp, tar = process_sequence(frames)
            if inp is not None and tar is not None:
                inputs.append(inp)
                targets.append(tar)
    dataset = tf.data.Dataset.from_tensor_slices((np.array(inputs), np.array(targets)))
    return dataset

def build_model(input_shape, num_interpolated):
    """
    Builds a simple encoder-decoder CNN.
    Input shape: (H, W, 6) corresponding to concatenated start and end frames.
    Output shape: (H, W, num_interpolated * 3) representing stacked intermediate frames.
    """
    inputs = layers.Input(shape=input_shape)
    # Encoder
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    # Decoder
    x = layers.UpSampling2D((2, 2))(x)
    outputs = layers.Conv2D(num_interpolated * 3, (3, 3), activation='sigmoid', padding='same')(x)
    model = models.Model(inputs, outputs)
    return model

# Prepare datasets
train_folder = "train"  # Folder containing training sequence subfolders
test_folder = "test"    # Folder containing testing sequence subfolders

train_ds = dataset_from_folder(train_folder)
train_ds = train_ds.shuffle(100).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_ds = dataset_from_folder(test_folder)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Build and compile the model
input_shape = (IMG_SIZE[1], IMG_SIZE[0], 6)  # (height, width, channels)
model = build_model(input_shape, NUM_INTERPOLATED)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Training
print("Starting training...")
history = model.fit(train_ds, epochs=EPOCHS, validation_data=test_ds)

# Evaluation
print("Evaluating on test data...")
loss, mae = model.evaluate(test_ds)
print("Test Loss:", loss)
print("Test MAE:", mae)

# (Optional) Make predictions on test samples
for inputs, targets in test_ds.take(1):
    predictions = model.predict(inputs)
    print("Predictions shape:", predictions.shape)

Starting training...
Epoch 1/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 6s/step - loss: 0.1449 - mae: 0.3470 - val_loss: 0.0058 - val_mae: 0.0413
Epoch 2/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 6s/step - loss: 0.0049 - mae: 0.0362 - val_loss: 0.0032 - val_mae: 0.0279
Epoch 3/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 6s/step - loss: 0.0029 - mae: 0.0269 - val_loss: 0.0022 - val_mae: 0.0222
Epoch 4/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 6s/step - loss: 0.0024 - mae: 0.0242 - val_loss: 0.0020 - val_mae: 0.0204
Epoch 5/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 6s/step - loss: 0.0020 - mae: 0.0209 - val_loss: 0.0018 - val_mae: 0.0209
Evaluating on test data...
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - loss: 0.0018 - mae: 0.0210
Test Loss: 0.0018052519299089909
Test MAE: 0.020923446863889694
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━