## General, "all frames" class

In [None]:
class JesterVideoDataset(Dataset):
    """
    A Dataset class for the 20BN-JESTER dataset structure.
    """
    def __init__(self, data_root, annotation_file, transform=None, text_label_dict=None):
        """
        Args:
            data_root (str): Root directory containing the numbered folders (e.g., "3_assign/...").
            annotation_file (str): Path to the CSV file (e.g., "jester-v1-train.csv").
                                   Format expected: "video_id;label_name"
            transform (callable, optional): Transform to apply to the *sequence* of images.
            text_label_dict (dict, optional): Dictionary mapping class names to integers. 
                                              If None, it is built automatically.
        """
        self.data_root = data_root
        self.transform = transform
        
        # 1. Read the CSV file that maps Folder IDs to Labels
        # Jester CSVs usually have no header and use semicolon delimiter
        df = pd.read_csv(annotation_file, sep=';', header=None, names=['video_id', 'label'])
        
        # Convert video_ids to string to match folder names safely
        self.video_ids = df['video_id'].astype(str).tolist()
        raw_labels = df['label'].tolist()

        # 2. Handle Labels (String -> Integer mapping)
        # If a dictionary is provided (e.g., from the training set), use it. 
        # Otherwise create one.
        if text_label_dict is not None:
            self.class_to_idx = text_label_dict
        else:
            unique_labels = sorted(list(set(raw_labels)))
            self.class_to_idx = {label: i for i, label in enumerate(unique_labels)}
            
        self.labels = [self.class_to_idx[l] for l in raw_labels]

    def __len__(self):
        return len(self.video_ids)

    def __getitem__(self, idx):
        """
        Returns:
            video_frames (list or Tensor): A list/stack of transformed images.
            label (int): The class index.
        """
        video_id = self.video_ids[idx]
        label = self.labels[idx]
        
        # Path to the specific video folder (e.g., root/147000)
        video_dir = os.path.join(self.data_root, video_id)
        
        # 3. Load Images
        # We must sort the images so the video plays in order (00001.jpg, 00002.jpg...)
        try:
            frame_names = sorted([x for x in os.listdir(video_dir) if x.endswith('.jpg')])
        except FileNotFoundError:
            # Fallback if a folder in the CSV is missing from the directory
            print(f"Warning: Missing folder {video_dir}")
            return torch.zeros(1), label 

        frames = []
        for frame_name in frame_names:
            img_path = os.path.join(video_dir, frame_name)
            image = Image.open(img_path).convert('RGB')
            frames.append(image)

        # 4. Apply Transforms
        # Note: Standard transforms work on single images. 
        # For videos, you usually loop through the list and apply the transform to each frame.
        if self.transform:
            # Assuming transform expects a single PIL image and returns a Tensor
            frames = [self.transform(img) for img in frames]
            
            # Stack them into a tensor of shape (Sequence_Length, Channels, Height, Width)
            # e.g., (32, 3, 224, 224)
            frames = torch.stack(frames) 
        
        return frames, label

    def get_class_mapping(self):
        """Returns the dictionary mapping label names to integers."""
        return self.class_to_idx

## 3D stacked class

In [None]:
# class Jester3DStackedDataset(Dataset)