In [1]:
%config Completer.use_jedi = False

import os
from video import Video
from extractors import ExtractorFactory
from mxnet import nd, gpu
import io
import cv2
import finch
from pathlib import Path
import numpy as np
import math
from pprint import pprint
from accuracy import Accuracy
from npy import Npy



# Getting Breakfast's video paths

### getting ground truth paths

In [2]:
gt_paths = {}
for path, subdirs, files in os.walk('segmentation_coarse'):
    split_path = path.split("/")
    if len(split_path) == 2:
        print(split_path)
        gt_paths[split_path[-1]] = []
        for name in files:
            if name.split(".")[-1] == 'txt':
                gt_paths[split_path[-1]].append(os.path.join(path, name))

sum([len(paths) for _, paths in gt_paths.items()])

['segmentation_coarse', 'milk']
['segmentation_coarse', 'scrambledegg']
['segmentation_coarse', 'juice']
['segmentation_coarse', 'pancake']
['segmentation_coarse', 'coffee']
['segmentation_coarse', 'friedegg']
['segmentation_coarse', 'tea']
['segmentation_coarse', 'cereals']
['segmentation_coarse', 'sandwich']
['segmentation_coarse', 'salat']


1712

### Function that transforms a ground truth file path to a video file path

In [3]:
from pathlib import Path

def gt_to_vd_path(gt_path):
    vd_root_folder = './BreakfastII_15fps_qvga_sync'
    splitted_path = gt_path.split("/")[-1].split("_")
    pfolder = splitted_path[0]
    splitted_path[-1] = splitted_path[-1].split(".")[0]

    if 'stereo' in gt_path:
        recfolder = splitted_path[1][:-2]
        filename = "_".join([splitted_path[0], splitted_path[-1], 'ch1'])
        vd_path = "/".join([vd_root_folder, pfolder, recfolder, filename + '.avi'])
        if Path(vd_path).exists():
            return vd_path
        else:
            filename = "_".join([splitted_path[0], splitted_path[-1], 'ch0'])
            vd_path = "/".join([vd_root_folder, pfolder, recfolder, filename + '.avi'])
            return vd_path
    else:
        recfolder = splitted_path[1]
        filename = "_".join([splitted_path[0], splitted_path[-1]])
    
    return "/".join([vd_root_folder, pfolder, recfolder, filename + '.avi'])

## Getting INRIA's video paths

In [4]:
def get_video_paths(root_folder, video_ext):
    paths = []
    for path, subdirs, files in os.walk(root_folder):
        if subdirs:
            for sub in subdirs:
                paths = paths + get_video_paths(sub, "mpg")
        for name in files:
            if name.split(".")[-1] == video_ext:
                paths.append(os.path.join(path, name))
    return paths

paths = get_video_paths(root_folder='data_new', video_ext='mpg')
len(paths)

149

## Extracting Dino Features

In [5]:
# from video import Video
# from dino import Dino

# video = Video("./BreakfastII_15fps_qvga_sync/P05/stereo/P05_milk_ch1.avi", "extractor")
# model = Dino()
# frames = video([0, 1], as_tensor=True)
# import torch
# frames = frames.to(torch.float32)
# frames = frames.permute(0, 3, 2, 1)
# model(frames)

In [None]:
extractor = ExtractorFactory.get(ExtractorFactory.DINO.value)(BATCH_SIZE=1)
for key, gts in gt_paths.items():
    print(key)
    for gt_path in gts:
        vd_path = gt_to_vd_path(gt_path)
        video = Video(vd_path, "_".join([ExtractorFactory.DINO.value, str(1)]))
        print(gt_path)
        extractor.extract(video)

Using cache found in /home/guilherme/.cache/torch/hub/facebookresearch_dino_main


milk
segmentation_coarse/milk/P05_stereo01_P05_milk.txt
segmentation_coarse/milk/P48_cam01_P48_milk.txt
segmentation_coarse/milk/P42_cam01_P42_milk.txt
segmentation_coarse/milk/P45_cam01_P45_milk.txt
segmentation_coarse/milk/P38_webcam01_P38_milk.txt
segmentation_coarse/milk/P53_webcam02_P53_milk.txt
segmentation_coarse/milk/P13_cam01_P13_milk.txt
segmentation_coarse/milk/P22_webcam01_P22_milk.txt
segmentation_coarse/milk/P51_webcam01_P51_milk.txt
segmentation_coarse/milk/P03_stereo01_P03_milk.txt
segmentation_coarse/milk/P11_cam01_P11_milk.txt
segmentation_coarse/milk/P07_stereo01_P07_milk.txt
segmentation_coarse/milk/P48_webcam02_P48_milk.txt
segmentation_coarse/milk/P24_webcam01_P24_milk.txt
segmentation_coarse/milk/P40_cam01_P40_milk.txt
segmentation_coarse/milk/P24_webcam02_P24_milk.txt
segmentation_coarse/milk/P32_cam01_P32_milk.txt
segmentation_coarse/milk/P50_stereo01_P50_milk.txt
segmentation_coarse/milk/P37_webcam01_P37_milk.txt
segmentation_coarse/milk/P29_cam01_P29_milk.txt

## Extracting SlowFast Features

In [None]:
clip_len = 32
extractor = ExtractorFactory.get(ExtractorFactory.SLOWFAST.value)(clip_len=clip_len)

### for Breakfast dataset

In [None]:
for key, gts in gt_paths.items():
    print(key)
    for gt_path in gts:
        vd_path = gt_to_vd_path(gt_path)
        video = Video(vd_path, "_".join([ExtractorFactory.SLOWFAST.value, str(clip_len)]))
        print(gt_path)
        extractor.extract(video)

### for INRIA

In [None]:
for path in paths:
    video = Video(path, "_".join([ExtractorFactory.SLOWFAST.value, str(clip_len)]))
    if video.features.has_features:
        continue
    print(path)
    extractor.extract(video)

## Extracting I3D features

### for INRIA

In [None]:
# create this file
with open('./video_paths.txt', 'w') as paths_file:
    for path in paths:
        paths_file.write("../action_eval/"+path+"\n")

# run this command in terminal to extract the features inside the video_features folder
# please, remember to substitute
# !python main.py --feature_type i3d --device_ids 0 --stack_size 10 --step_size 10 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt
# !python main.py --feature_type i3d --device_ids 0 --stack_size 16 --step_size 16 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt
# !python main.py --feature_type i3d --device_ids 0 --stack_size 24 --step_size 24 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt
# !python main.py --feature_type i3d --device_ids 0 --stack_size 32 --step_size 32 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt
# !python main.py --feature_type i3d --device_ids 0 --stack_size 40 --step_size 40 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt
# !python main.py --feature_type i3d --device_ids 0 --stack_size 48 --step_size 48 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt
# !python main.py --feature_type i3d --device_ids 0 --stack_size 64 --step_size 64 --output_path ../action_eval/inria_features --on_extraction save_numpy --file_with_video_paths ../action_eval/video_paths.txt

### for Breakfast dataset

Copy the `extract_features.py` file to the video_features folder and run the command below.
```
python extract_features.py
```
Don't forget to run this script for each segment size mentioned in the paper, just substitute the proper var inside the script to do this,