### Load Dataset

In [27]:
import errno
import os
import glob
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from typing import Tuple, Any, Optional, Callable

class YouCookDataset(Dataset):
    #TODO - Add download feature
    #url = 'http://youcook2.eecs.umich.edu/static/YouCookII/YouCookII.tar.gz'

    #path based on file format
    path = 'features/feat_{format}/{phase}_frame_feat_{format}'
    phases = ['train', 'val', 'test']


    def __init__(self, annotation_file, 
        root:str,
        label_file: str,
        phase: int = 1,  #1 for train, #2 for validation, #3 for test
        file_format: str = 'csv', #csv(default) and dat format supported
        transform: Optional[Callable] = None, #used for transforming numerical video data to a required format
        download: bool = False) -> None:
        # Download case not handled for now.
        # It will throw error if the relevant files are not found.
        
        #annotation files
        annotation_path = os.path.join(root, annotation_file)
        if not os.path.exists(annotation_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), 
                                    annotation_path)
        #label files 
        label_path = os.path.join(root, label_file)
        if not os.path.exists(label_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), 
                                    label_path) #raise the correct error

        #self.annotations = pd.read_csv(annotation_path)
        #self.labels = pd.read_csv(label_path)
        self.labels = pd.DataFrame()
        #data files
        phase = self.phases[phase]
        data_path = self.path.format(format = file_format, phase = phase)
        data_path = os.path.join(root, data_path)
        
        if not os.path.exists(data_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), 
                                    data_path) #raise the correct error

        print(data_path)
        #file_list = list(os.walk(f'{data_path}'))
        file_list = glob.glob(f'{data_path}/**/*.{file_format}', recursive=True)
        print(file_list)
        data: Any = []
        for file_path in file_list:
            vid_data = np.genfromtxt(file_path, delimiter=',')
            data.append(vid_data)
        data_np = np.concatenate([data], axis = 0)
        self.data = torch.from_numpy(data_np)


    def __getitem__(self, index) -> Tuple[Any, Any]:
        vid, label = self.data[index], self.labels[index]

        if self.transform is not None:
            vid = self.transform(vid)

        return vid, label

    def __len__(self) -> int:
        return self.data.size()[0]

In [28]:
d = YouCookDataset('annotation_file', '../data', 'label_file', 2)

../data/features/feat_csv/test_frame_feat_csv
['../data/features/feat_csv/test_frame_feat_csv/101/zLLh104pkeg/0001/resnet_34_feat_mscoco.csv', '../data/features/feat_csv/test_frame_feat_csv/101/YSes0R7EksY/0001/resnet_34_feat_mscoco.csv']


In [31]:
d.data.shape

torch.Size([2, 500, 512])

### Key-Clip Selection

#### Neural-based Selection Model

### Structured Knowledge Extraction

#### TSM method - Action and Object detection (video only)

In [1]:
import torch.hub
repo = 'epic-kitchens/action-models'

class_counts = (125, 352)
segment_count = 8
base_model = 'resnet50'
tsn = torch.hub.load(repo, 'TSN', class_counts, segment_count, 'RGB',
                     base_model=base_model, 
                     pretrained='epic-kitchens', force_reload=True)
trn = torch.hub.load(repo, 'TRN', class_counts, segment_count, 'RGB',
                     base_model=base_model, 
                     pretrained='epic-kitchens')
mtrn = torch.hub.load(repo, 'MTRN', class_counts, segment_count, 'RGB',
                     base_model=base_model, 
                      pretrained='epic-kitchens')
tsm = torch.hub.load(repo, 'TSM', class_counts, segment_count, 'RGB',
                     base_model=base_model, 
                     pretrained='epic-kitchens')

# Show all entrypoints and their help strings
for entrypoint in torch.hub.list(repo):
    print(entrypoint)
    print(torch.hub.help(repo, entrypoint))

batch_size = 1
segment_count = 8
snippet_length = 1  # Number of frames composing the snippet, 1 for RGB, 5 for optical flow
snippet_channels = 3  # Number of channels in a frame, 3 for RGB, 2 for optical flow
height, width = 224, 224

inputs = torch.randn(
    [batch_size, segment_count, snippet_length, snippet_channels, height, width]
)
# The segment and snippet length and channel dimensions are collapsed into the channel
# dimension
# Input shape: N x TC x H x W
inputs = inputs.reshape((batch_size, -1, height, width))
for model in [tsn, trn, mtrn, tsm]:
    # You can get features out of the models
    features = model.features(inputs)
    # and then classify those features
    verb_logits, noun_logits = model.logits(features)
    
    # or just call the object to classify inputs in a single forward pass
    verb_logits, noun_logits = model(inputs)
    print(verb_logits.shape, noun_logits.shape)

Downloading: "https://github.com/epic-kitchens/action-models/archive/master.zip" to /Users/tejaskasetty/.cache/torch/hub/master.zip
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master


Multi-Scale Temporal Relation Network Module in use ['8-frame relation', '7-frame relation', '6-frame relation', '5-frame relation', '4-frame relation', '3-frame relation', '2-frame relation']


Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master
Using cache found in /Users/tejaskasetty/.cache/torch/hub/epic-kitchens_action-models_master


MTRN

    Multi-scale Temporal Relational Network

    See https://arxiv.org/abs/1711.08496 for more details.
    Args:
        num_class:
            Number of classes, can be either a single integer,
            or a 2-tuple for training verb+noun multi-task models
        num_segments:
            Number of frames/optical flow stacks input into the model
        modality:
            Either ``RGB`` or ``Flow``.
        base_model:
            Backbone model architecture one of ``resnet18``, ``resnet30``,
            ``resnet50``, ``BNInception``, ``InceptionV3``, ``VGG16``.
            ``BNInception`` and ``resnet50`` are the most thoroughly tested.
        new_length:
            The number of channel inputs per snippet
        consensus_type:
            The consensus function used to combined information across segments.
            One of ``avg``, ``max``, ``TRN``, ``TRNMultiscale``.
        before_softmax:
            Whether to output class score before or after softmax.
     