In [6]:
# install sklearn
%pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Downloading scikit_learn-1.2.2-cp311-cp311-macosx_10_9_x86_64.whl (9.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
Collecting scipy>=1.3.2
  Downloading scipy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl (35.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.0/35.0 MB[0m [31m251.0 kB/s[0m eta [36m0:00:00[0m00:01[0m00:04[0m
[?25hCollecting joblib>=1.1.1
  Downloading joblib-1.2.0-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m75.6 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Installing collected packages: threadpoolctl, scipy, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.2.2

In [7]:
import os
import random
import torch
import torchvision.transforms as transforms
import torchvision.transforms.v2 as transformsv2
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_video
from sklearn.model_selection import train_test_split

In [8]:
class UCF101Dataset(Dataset):
    def __init__(self, video_list, labels, root_dir, transform=None):
        self.video_list = video_list
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.video_list)

    def __getitem__(self, idx):
        video_name = self.video_list[idx]
        label = self.labels[video_name]
        video_path = os.path.join(self.root_dir, video_name)
        video_frames, _, _ = read_video(video_path, pts_unit="sec")

        if self.transform:
            video_frames = torch.stack([self.transform(frame) for frame in video_frames])

        return video_frames, label

In [11]:
# Load the dataset
root_dir = "projet/data/UCF101"

# Assign labels to the videos
labels = {}
videos = []

for action_folder in os.listdir(root_dir):
    action_folder_path = os.path.join(root_dir, action_folder)
    
    if os.path.isdir(action_folder_path):
        for video in os.listdir(action_folder_path):
            video_path = os.path.join(action_folder_path, video)
            
            if os.path.isfile(video_path):
                labels[video_path] = action_folder
                videos.append(video_path)

# Create train-test splits
train_videos, test_videos = train_test_split(videos, test_size=0.2, random_state=42)

FileNotFoundError: [Errno 2] No such file or directory: 'projet/data/UCF101'

In [4]:
#Resize
resize = 112

# Color Jetter transformation
coljit = 0.1

# normalization parameters
mean = (0.5, 0.5, 0.5)
std = (0.5, 0.5, 0.5)

inner_transforms = transformsv2.Compose([
    transformsv2.Resize(resize),
    transformsv2.Normalize(mean, std),
    transformsv2.ToImageTensor(),
    transformsv2.ConvertImageDtype(torch.float32)
])

outer_transforms = transformsv2.Compose([
    transformsv2.RandomHorizontalFlip(),
    transformsv2.ColorJitter(brightness=coljit, contrast=coljit, saturation=coljit, hue=coljit),
])

# define the v2 transformations to be applied to the images
transform_val = transforms.Compose([
    inner_transforms
])

transform_train = transforms.Compose([
    outer_transforms,
    inner_transforms
])

In [None]:
selected_actions = ['ApplyEyeMakeup', 'BenchPress', 'CliffDiving']  # Replace these with the actual action names you want to keep

filtered_train_videos = [video_path for video_path in train_videos if labels[video_path] in selected_actions]
filtered_train_labels = [label for label in labels if label in selected_actions]

filtered_test_videos = [video_path for video_path in test_videos if labels[video_path] in selected_actions]
filtered_test_labels = [label for label in labels if label in selected_actions]

# Create the train and test datasets
train_dataset = UCF101Dataset(filtered_train_videos, filtered_train_labels, root_dir, transform_train)
test_dataset = UCF101Dataset(filtered_test_videos, filtered_test_labels, root_dir, transform_val)

In [None]:
batch_size = 2

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

In [8]:
import torchvision.models as models

# Load the pre-trained 3D ResNet-18 model
r3d_18 = models.video.r3d_18(weights='DEFAULT')

# Remove the last fully connected layer to use the model for feature extraction
r3d_18 = torch.nn.Sequential(*list(r3d_18.children())[:-1])