In [None]:
import cv2
import os
import random

def extract_frames(video_path):
    """Extracts all frames from a given video file."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def save_sequence(sequence, output_dir, video_name, seq_index):
    """Saves a sequence of frames into a dedicated subfolder."""
    seq_folder = os.path.join(output_dir, f"{video_name}_seq_{seq_index}")
    os.makedirs(seq_folder, exist_ok=True)
    for i, frame in enumerate(sequence):
        frame_path = os.path.join(seq_folder, f"frame_{i}.jpg")
        cv2.imwrite(frame_path, frame)

def generate_dataset(input_folder, train_folder, test_folder, num_interpolated=3, train_ratio=0.8):
    """
    Processes each video in the input folder.
    
    For each video, a sliding window of length (num_interpolated + 2) is used
    to generate sequences where the first and last frames are the inputs for interpolation,
    and the frames in between are used as ground truth.
    
    Each sequence is randomly assigned to train or test.
    """
    # The total sequence length includes the starting and ending frames
    sequence_length = num_interpolated + 2
    
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    for filename in os.listdir(input_folder):
        if filename.endswith(".mp4"):
            video_path = os.path.join(input_folder, filename)
            video_name = os.path.splitext(filename)[0]
            frames = extract_frames(video_path)
            total_frames = len(frames)
            seq_index = 0
            # Slide a window over the frames to generate sequences
            for i in range(total_frames - sequence_length + 1):
                sequence = frames[i:i + sequence_length]
                # Randomly assign the sequence to training or testing set
                if random.random() < train_ratio:
                    save_sequence(sequence, train_folder, video_name, seq_index)
                else:
                    save_sequence(sequence, test_folder, video_name, seq_index)
                seq_index += 1

if __name__ == "__main__":
    input_folder = "input"    # Folder containing your mp4 videos
    train_folder = "train"    # Output folder for training sequences
    test_folder = "test"      # Output folder for testing sequences
    generate_dataset(input_folder, train_folder, test_folder)
