In [None]:
import glob
import random
import os
import numpy as np
import torch

from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms as transforms

# Normalization parameters for pre-trained PyTorch models
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])


class Dataset(Dataset):
    def __init__(self, dataset_path, split_path, split_number, input_shape, sequence_length, training):
        self.training = training
        self.label_index = self._extract_label_mapping(split_path)
        self.sequences = self._extract_sequence_paths(dataset_path, split_path, split_number, training)
        self.sequence_length = sequence_length
        self.label_names = sorted(list(set([self._activity_from_path(seq_path) for seq_path in self.sequences])))
        self.num_classes = len(self.label_names)
        self.transform = transforms.Compose(
            [
                transforms.Resize(input_shape[-2:], Image.BICUBIC),
                transforms.ToTensor(),
                transforms.Normalize(mean, std),
            ]
        )

    def _extract_label_mapping(self, split_path="data/ucfTrainTestlist"):
        """ Extracts a mapping between activity name and softmax index """
        with open(os.path.join(split_path, "classInd.txt")) as file:
            lines = file.read().splitlines()
        label_mapping = {}
        for line in lines:
            label, action = line.split()
            label_mapping[action] = int(label) - 1
        return label_mapping

    def _extract_sequence_paths(
        self, dataset_path, split_path="data/ucfTrainTestlist", split_number=1, training=True
    ):
        """ Extracts paths to sequences given the specified train / test split """
        assert split_number in [1, 2, 3], "Split number has to be one of {1, 2, 3}"
        fn = f"trainlist0{split_number}.txt" if training else f"testlist0{split_number}.txt"
        split_path = os.path.join(split_path, fn)
        with open(split_path) as file:
            lines = file.read().splitlines()
        sequence_paths = []
        for line in lines:
            seq_name = line.split(".avi")[0]
            sequence_paths += [os.path.join(dataset_path, seq_name)]
        return sequence_paths

    def _activity_from_path(self, path):
        """ Extracts activity name from filepath """
        return path.split("/")[-2]

    def _frame_number(self, image_path):
        """ Extracts frame number from filepath """
        return int(image_path.split("/")[-1].split(".jpg")[0])

    def _pad_to_length(self, sequence):
        """ Pads the sequence to required sequence length """
        left_pad = sequence[0]
        if self.sequence_length is not None:
            while len(sequence) < self.sequence_length:
                sequence.insert(0, left_pad)
        return sequence

    def __getitem__(self, index):
        print(len(self))
        print(index % len(self))
        sequence_path = self.sequences[index % len(self)]
        print(self._frame_number(sequence_path))
        # Sort frame sequence based on frame number. He needs to do this because he has a list of jpgs..
        image_paths = sorted(glob.glob(f"{sequence_path}/*.jpg"), key=lambda path: self._frame_number(path))
        # Pad frames sequences shorter than `self.sequence_length` to length
        image_paths = self._pad_to_length(image_paths)
        if self.training:
            # Randomly choose sample interval and start frame
            sample_interval = np.random.randint(1, len(image_paths) // self.sequence_length + 1)
            start_i = np.random.randint(0, len(image_paths) - sample_interval * self.sequence_length + 1)
            flip = np.random.random() < 0.5
        else:
            # Start at first frame and sample uniformly over sequence
            start_i = 0
            sample_interval = 1 if self.sequence_length is None else len(image_paths) // self.sequence_length
            flip = False
        # Extract frames as tensors
        image_sequence = []
        for i in range(start_i, len(image_paths), sample_interval):
            if self.sequence_length is None or len(image_sequence) < self.sequence_length:
                image_tensor = self.transform(Image.open(image_paths[i]))
                if flip:
                    image_tensor = torch.flip(image_tensor, (-1,))
                image_sequence.append(image_tensor)
        image_sequence = torch.stack(image_sequence)
        target = self.label_index[self._activity_from_path(sequence_path)]
        return image_sequence, target

    def __len__(self):
        return len(self.sequences)

In [None]:
# Define training set
train_dataset = Dataset(
    dataset_path='../data/UCF101',
    split_path='../data/UCF101/annotations/ucfTrainTestlist',
    split_number=1,
    input_shape=(224, 224),
    sequence_length=40,
    training=True,
)

In [None]:
train_dataset[0]

9537
0


ValueError: invalid literal for int() with base 10: 'v_ApplyEyeMakeup_g08_c01'

In [None]:
%debug

> [0;32m<ipython-input-28-3ac7fb83b021>[0m(63)[0;36m_frame_number[0;34m()[0m
[0;32m     61 [0;31m    [0;32mdef[0m [0m_frame_number[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mimage_path[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     62 [0;31m        [0;34m""" Extracts frame number from filepath """[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 63 [0;31m        [0;32mreturn[0m [0mint[0m[0;34m([0m[0mimage_path[0m[0;34m.[0m[0msplit[0m[0;34m([0m[0;34m"/"[0m[0;34m)[0m[0;34m[[0m[0;34m-[0m[0;36m1[0m[0;34m][0m[0;34m.[0m[0msplit[0m[0;34m([0m[0;34m".jpg"[0m[0;34m)[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     64 [0;31m[0;34m[0m[0m
[0m[0;32m     65 [0;31m    [0;32mdef[0m [0m_pad_to_length[0m[0;34m([0m[0mself[0m[0;34m,[0m [0msequence[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> image_path.split("/")[-1].split(".jpg")
['v_ApplyEyeMakeup_g08_c01