## Mount Google Drive

### Subtask:
Mount Google Drive to access video files and save processed frames.


**Reasoning**:
The user wants to mount Google Drive, and the first code cell provided in the notebook is designed to do exactly that.



In [None]:
# Agregar la carpeta donde guardo los datos.

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Initialize Directories and Parameters

### Subtask:
Set up input and output directories for videos and processed frames, and define parameters like the desired frames per second (fps) and target resolution for normalization (e.g., 640x480).


**Reasoning**:
The subtask requires initializing input/output directories, creating the output directory if it doesn't exist, and defining processing parameters such as frames per second and target resolution. This code block will perform all these initial setup steps.



In [4]:
import cv2
from pathlib import Path

raw_dir = Path("/content/drive/MyDrive/")
out_dir = Path("/content/drive/MyDrive/processed")
out_dir.mkdir(exist_ok=True, parents=True)

fps = 10  # frames por segundo deseado
target_resolution = (640, 480) # width, height for normalization

print(f"Raw video directory: {raw_dir}")
print(f"Processed frames output directory: {out_dir}")
print(f"Desired frames per second (fps): {fps}")
print(f"Target resolution for frames: {target_resolution}")

Raw video directory: /content/drive/MyDrive
Processed frames output directory: /content/drive/MyDrive/processed
Desired frames per second (fps): 10
Target resolution for frames: (640, 480)


## Iterate and Process Videos

### Subtask:
Loop through each video file in the raw directory to extract frames.


In [6]:
for vid in raw_dir.glob("*.mp4"):
    print(f"Processing video: {vid.name}")
    cap = cv2.VideoCapture(str(vid))
    if not cap.isOpened():
        print(f"Error: Could not open video {vid.name}")
        continue

    out_subdir = out_dir / vid.stem
    out_subdir.mkdir(exist_ok=True)
    print(f"  Output directory for frames: {out_subdir}")

    i = 0
    frames_extracted_count = 0
    video_fps_actual = cap.get(cv2.CAP_PROP_FPS)

    if video_fps_actual == 0:
        print(f"Warning: Could not get FPS for {vid.name}, assuming 30 FPS for processing.")
        video_fps_actual = 30 # Default to 30 FPS if not available

    skip_interval = max(1, int(video_fps_actual // fps))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if i % skip_interval == 0:
            # Convert to grayscale and normalize resolution
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            resized_frame = cv2.resize(gray_frame, target_resolution)

            cv2.imwrite(str(out_subdir / f"frame_{i:04d}.png"), resized_frame)
            frames_extracted_count += 1
        i += 1
    cap.release()
    print(f"  Extracted and processed {frames_extracted_count} frames from {vid.name}")

print("Frame extraction and initial processing loop completed.")

Frame extraction and initial processing loop completed.


## Stabilize Frames

### Subtask:
Implement video stabilization techniques using OpenCV to correct for unwanted camera movements, ensuring a smoother sequence of frames.


In [7]:
import numpy as np

# Placeholder for a list to store all frames for a video for stabilization
video_frames_to_stabilize = {}

print("Preparing for stabilization...")

for vid_subdir in out_dir.iterdir():
    if vid_subdir.is_dir():
        stabilized_subdir = vid_subdir / "stabilized"
        stabilized_subdir.mkdir(exist_ok=True)
        print(f"  Created stabilized directory: {stabilized_subdir}")

        # Load all grayscale and normalized PNG frames for this video
        frames_paths = sorted(vid_subdir.glob("frame_*.png"))
        if not frames_paths:
            print(f"    No frames found in {vid_subdir}, skipping stabilization for this video.")
            continue

        current_video_frames = []
        for frame_path in frames_paths:
            frame = cv2.imread(str(frame_path), cv2.IMREAD_GRAYSCALE)
            if frame is not None:
                current_video_frames.append(frame)

        if current_video_frames:
            video_frames_to_stabilize[vid_subdir.name] = current_video_frames
            print(f"    Loaded {len(current_video_frames)} frames for stabilization from {vid_subdir.name}.")
        else:
            print(f"    Failed to load any frames from {vid_subdir}, skipping stabilization for this video.")

print("Preparation for stabilization complete. Loaded frames into 'video_frames_to_stabilize' dictionary.")

Preparing for stabilization...
Preparation for stabilization complete. Loaded frames into 'video_frames_to_stabilize' dictionary.


In [8]:
print("Calculating raw camera motion...")

# Parameters for feature detection (goodFeaturesToTrack)
feature_params = dict( maxCorners = 100,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )

# Parameters for optical flow (calcOpticalFlowPyrLK)
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Dictionary to store raw transformation matrices for each video
video_raw_transforms = {}

for video_name, frames in video_frames_to_stabilize.items():
    if len(frames) < 2:
        print(f"  Skipping raw motion calculation for {video_name}: Less than 2 frames available.")
        continue

    print(f"  Calculating raw transformations for {video_name} ({len(frames)} frames)...")
    raw_transforms = []

    # The first frame has no preceding transformation, so we add an identity matrix
    # This will be used to accumulate motion later.
    raw_transforms.append(np.array([[1., 0., 0.], [0., 1., 0.]], dtype=np.float32))

    for i in range(len(frames) - 1):
        prev_frame = frames[i]
        curr_frame = frames[i+1]

        # 1. Detect features in the previous frame
        prev_pts = cv2.goodFeaturesToTrack(prev_frame, mask = None, **feature_params)

        if prev_pts is None or len(prev_pts) < 10: # Ensure enough features are detected
            # If not enough features, use a default identity transformation or log an error
            transform = np.array([[1., 0., 0.], [0., 1., 0.]], dtype=np.float32)
            raw_transforms.append(transform)
            # print(f"    Warning: Not enough features for frame {i} in {video_name}. Using identity transform.")
            continue

        # 2. Track features to the current frame using optical flow
        curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_frame, curr_frame, prev_pts, None, **lk_params)

        # Filter out points that were not found in the current frame
        good_prev_pts = prev_pts[status == 1]
        good_curr_pts = curr_pts[status == 1]

        if len(good_prev_pts) < 10 or len(good_curr_pts) < 10: # Ensure enough tracked features
            transform = np.array([[1., 0., 0.], [0., 1., 0.]], dtype=np.float32)
            raw_transforms.append(transform)
            # print(f"    Warning: Not enough tracked features for frame {i} in {video_name}. Using identity transform.")
            continue

        # 3. Estimate the affine transformation between the two sets of points
        # We need to reshape points for estimateAffine2D if they are not already (N, 1, 2)
        # ensure good_prev_pts and good_curr_pts are float32
        m, _ = cv2.estimateAffine2D(good_prev_pts, good_curr_pts)

        if m is None: # If estimation fails, use identity matrix
            m = np.array([[1., 0., 0.], [0., 1., 0.]], dtype=np.float32)
            # print(f"    Warning: Failed to estimate transform for frame {i} in {video_name}. Using identity transform.")

        # Store the transformation from previous to current frame
        raw_transforms.append(m)

    video_raw_transforms[video_name] = raw_transforms
    print(f"  Finished calculating {len(raw_transforms)} raw transformations for {video_name}.")

print("Raw camera motion calculation complete.")

Calculating raw camera motion...
Raw camera motion calculation complete.


In [9]:
print("Smoothing camera motion...")

# Smoothing window size (e.g., 30 frames for a 1-second smoothing at 30fps)
SMOOTHING_RADIUS = 15 # Can be adjusted based on desired smoothness vs. lag

# Dictionary to store smoothed transformation matrices for each video
video_smoothed_transforms = {}

# Function to smooth a path
def smooth(trajectory, radius):
    window_size = 2 * radius + 1
    # Apply a moving average filter
    s = np.convolve(trajectory, np.ones(window_size)/window_size, mode='valid')
    # Pad the beginning and end to match the original trajectory length
    pad_start = trajectory[0] - s[0] if len(s) > 0 else 0
    pad_end = trajectory[-1] - s[-1] if len(s) > 0 else 0

    # Calculate the average of the first 'radius' elements for the start padding
    start_avg = np.mean(trajectory[:radius]) if radius > 0 else trajectory[0]
    # Calculate the average of the last 'radius' elements for the end padding
    end_avg = np.mean(trajectory[-radius:]) if radius > 0 else trajectory[-1]

    # Pad the smoothed trajectory with the first/last values to match length
    if len(s) == 0: # Handle case where trajectory is shorter than window
        return trajectory

    smoothed_trajectory = np.concatenate(
        [np.full(radius, start_avg), s, np.full(radius, end_avg)]
    )
    return smoothed_trajectory


for video_name, raw_transforms in video_raw_transforms.items():
    if len(raw_transforms) < 2: # At least one actual transform (meaning 2 frames) + identity for first frame
        print(f"  Skipping smoothing for {video_name}: Not enough raw transformations.")
        continue

    print(f"  Processing {len(raw_transforms)} transformations for {video_name}...")

    # 1. Accumulate individual frame-to-frame transformations to get a global motion path
    # This path represents the raw camera movement in terms of x, y, and rotation components.
    x_path = [0.0]
    y_path = [0.0]
    a_path = [0.0] # Angle

    for i in range(1, len(raw_transforms)): # Start from the first actual transform
        dx = raw_transforms[i][0, 2]
        dy = raw_transforms[i][1, 2]
        da = np.arctan2(raw_transforms[i][0, 1], raw_transforms[i][0, 0]) # Rotation component

        x_path.append(x_path[-1] + dx)
        y_path.append(y_path[-1] + dy)
        a_path.append(a_path[-1] + da)

    # Convert paths to numpy arrays for smoothing
    x_path = np.array(x_path)
    y_path = np.array(y_path)
    a_path = np.array(a_path)

    # 2. Apply a smoothing filter to the global motion path
    smoothed_x = smooth(x_path, SMOOTHING_RADIUS)
    smoothed_y = smooth(y_path, SMOOTHING_RADIUS)
    smoothed_a = smooth(a_path, SMOOTHING_RADIUS)

    # 3. Calculate the correction transformation for each frame
    # The difference between the raw path and the smoothed path gives the required correction
    corrections = []
    for i in range(len(raw_transforms)): # Iterate through all frames to apply corrections
        # Desired_transform = Actual_transform * Correction_transform
        # Correction_transform = Desired_transform * Actual_transform_inverse

        # The desired frame-to-frame transform is derived from the smoothed path
        # smoothed_dx = smoothed_x[i] - smoothed_x[i-1] (for i > 0)
        # smoothed_dy = smoothed_y[i] - smoothed_y[i-1]
        # smoothed_da = smoothed_a[i] - smoothed_a[i-1]

        # The correction is the difference between the smoothed path and the raw path.
        # This is the amount of motion to subtract from the raw motion.
        dx_diff = smoothed_x[i] - x_path[i]
        dy_diff = smoothed_y[i] - y_path[i]
        da_diff = smoothed_a[i] - a_path[i]

        # Construct the correction matrix
        correction_matrix = np.array([
            [np.cos(da_diff), -np.sin(da_diff), dx_diff],
            [np.sin(da_diff),  np.cos(da_diff), dy_diff]
        ], dtype=np.float32)
        corrections.append(correction_matrix)

    video_smoothed_transforms[video_name] = corrections
    print(f"  Generated {len(corrections)} correction transformations for {video_name}.")

print("Camera motion smoothing complete. Stored in 'video_smoothed_transforms' dictionary.")

Smoothing camera motion...
Camera motion smoothing complete. Stored in 'video_smoothed_transforms' dictionary.


In [10]:
print("Applying smoothed motion and saving stabilized frames...")

for video_name, frames in video_frames_to_stabilize.items():
    if video_name not in video_smoothed_transforms:
        print(f"  Skipping stabilization for {video_name}: No smoothed transforms found.")
        continue

    corrections = video_smoothed_transforms[video_name]

    if len(frames) != len(corrections): # It should be frames + 1 for raw_transforms, but for corrections it should match frames count
        print(f"  Warning: Number of frames ({len(frames)}) does not match number of corrections ({len(corrections)}) for {video_name}. Skipping.")
        continue

    # Get the output subdirectory for stabilized frames for this video
    # out_dir / video_name is the video's original processed frame directory
    # and inside that, there should be a 'stabilized' folder
    stabilized_subdir = out_dir / video_name / "stabilized"
    if not stabilized_subdir.exists():
        stabilized_subdir.mkdir(parents=True, exist_ok=True)
        print(f"  Created missing stabilized directory: {stabilized_subdir}")

    print(f"  Stabilizing and saving frames for {video_name}...")
    stabilized_frames_count = 0
    for i, frame in enumerate(frames):
        # Apply the correction transformation
        # cv2.warpAffine expects a 2x3 matrix
        corrected_frame = cv2.warpAffine(frame, corrections[i], (frame.shape[1], frame.shape[0]))

        # Save the stabilized frame
        cv2.imwrite(str(stabilized_subdir / f"frame_stabilized_{i:04d}.png"), corrected_frame)
        stabilized_frames_count += 1
    print(f"  Saved {stabilized_frames_count} stabilized frames for {video_name}.")

print("Stabilization and saving of frames complete.")


Applying smoothed motion and saving stabilized frames...
Stabilization and saving of frames complete.
