In [1]:
import cv2
import numpy as np
from typing import List, Tuple

In [3]:
class TV:
    def __init__(self, original_position, frame_numbers, color_series):
        """
        Initialize a TV instance.

        :param original_position: Tuple[int, int] representing (x, y) coordinates.
        :param frame_numbers: List[int], list of frame indices.
        :param color_series: NumPy array of shape (len(frame_numbers), 3) representing RGB colors.
        """
        self.original_position = original_position
        self.current_position = original_position
        self.frame_numbers = frame_numbers
        self.color_series = color_series  # Shape: (len(frame_numbers), 3)

    def __getitem__(self, index):
        """
        Retrieve the color at a specific frame number.

        :param index: int, the frame number.
        :return: Tuple[int, int, int] representing the RGB color.
        """
        return self.color_series[index]

    def __len__(self):
        """
        Return the number of frames in the color series.

        :return: int
        """
        return len(self.color_series)

    def __repr__(self):
        return (f"TV(original_position={self.original_position}, "
                f"num_frames={len(self.color_series)})")


class VideoProcessor:
    def __init__(self, video_path: str):
        """
        Initialize the VideoProcessor by setting the video path and retrieving video dimensions.

        :param video_path: str, path to the video file.
        """
        self.video_path = video_path
        self.width = None
        self.height = None
        self._retrieve_video_dimensions()

    def _retrieve_video_dimensions(self):
        """
        Retrieve and store the width and height of the video.
        """
        cap = cv2.VideoCapture(self.video_path)
        if not cap.isOpened():
            raise IOError(f"Cannot open video file {self.video_path}")

        # Retrieve width and height
        self.width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        self.height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        if self.width == 0 or self.height == 0:
            # Fallback: read the first frame to get dimensions
            ret, frame = cap.read()
            if not ret:
                cap.release()
                raise IOError(f"Cannot read frames to determine video dimensions for {self.video_path}")
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            self.height, self.width, _ = frame_rgb.shape

        cap.release()

    def create_tvs(self, positions: List[Tuple[int, int]], frame_numbers: List[int] = None) -> List[TV]:
        """
        Create multiple TV instances for a list of pixel positions.

        :param positions: List[Tuple[int, int]] representing (x, y) coordinates.
        :param frame_numbers: List[int], list of frame indices to include. Defaults to all frames.
        :return: List[TV] instances.
        """
        # Validate positions against video dimensions
        for pos in positions:
            x, y = pos
            if not (0 <= x < self.width and 0 <= y < self.height):
                raise ValueError(f"Position {pos} is out of bounds for frame size ({self.width}, {self.height}).")

        # Initialize data structures for each position
        tvs_data = {pos: {'frame_numbers': [], 'color_series': []} for pos in positions}

        cap = cv2.VideoCapture(self.video_path)
        if not cap.isOpened():
            raise IOError(f"Cannot open video file {self.video_path}")

        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        if frame_numbers is None:
            frame_numbers = list(range(total_frames))
        else:
            # Validate frame numbers
            for fn in frame_numbers:
                if not (0 <= fn < total_frames):
                    raise ValueError(f"Frame number {fn} is out of bounds for number of frames {total_frames}.")

        # Process specified frames
        for frame_idx in frame_numbers:
            # Set the position of the next frame to be read
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ret, frame = cap.read()
            if not ret:
                cap.release()
                raise IOError(f"Failed to read frame {frame_idx} from video.")

            # Convert frame from BGR to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            for pos in positions:
                x, y = pos
                color = frame_rgb[y, x, :]  # Shape: (3,)
                tvs_data[pos]['frame_numbers'].append(frame_idx)
                tvs_data[pos]['color_series'].append(color)

        cap.release()

        # Create TV instances
        tv_instances = []
        for pos in positions:
            frames = tvs_data[pos]['frame_numbers']
            colors = np.array(tvs_data[pos]['color_series'])
            tv = TV(original_position=pos, frame_numbers=frames, color_series=colors)
            tv_instances.append(tv)

        return tv_instances

def main():
    # Example usage
    video_path = 'cab_ride_trimmed.mkv'  # Replace with your video file path
    processor = VideoProcessor(video_path)

    # Define pixel positions you want to track
    pixel_positions = [
        (50, 50),
        (100, 100),
        (150, 150),
        # Add more positions as needed
    ]

    # Define specific frame numbers you want to process
    specific_frame_numbers = [0, 10, 20, 30, 40, 50]  # Example frame numbers

    # Create TV instances for these positions with specific frame numbers
    tvs = processor.create_tvs(pixel_positions, frame_numbers=specific_frame_numbers)

    # Example: Accessing color series
    for tv in tvs:
        print(tv)
        # Get color at frame number 20
        try:
            idx = tv.frame_numbers.index(20)
            color = tv.color_series[idx]
            print(f"Color at frame 20 for position {tv.original_position}: {color}")
        except ValueError as e:
            print(f"Frame 20 not found in TV at position {tv.original_position}.")

    # Example: Iterate over all TVs and their color series
    for tv in tvs:
        print(f"TV at position {tv.original_position}:")
        for i, color in enumerate(tv.color_series):
            frame_number = tv.frame_numbers[i]
            print(f"  Frame {frame_number}: {color}")


if __name__ == "__main__":
    main()

NameError: name 'List' is not defined