In [13]:
import pandas as pd
import os
import numpy as np 

In [27]:
frames = 10
EGOSCHEMA_FOLDER = "../../EgoSchema"

if not os.path.exists(f"features"):
    print("making folder")
    os.mkdir(f"features")

In [28]:
total = 0
VID_PATH = f"{EGOSCHEMA_FOLDER}/videos"

to_add = []
for q_uid in os.listdir(f"{VID_PATH}"):
    if not os.path.exists(f"{VID_PATH}/{q_uid[:q_uid.rfind('.')]}.npy"):
        to_add.append(q_uid[:q_uid.rfind('.')])
        total += 1

In [29]:
import torch
for q_uid in os.listdir(f"./features/"):
    if torch.from_numpy(np.load(f"./features/{q_uid}").astype("float32")).shape[0] != frames:
        to_add.append(q_uid[:q_uid.rfind('.')])
        print(q_uid)

In [30]:
table = []
columns = ["video_path", "feature_path"]


for q_uid_mp4 in os.listdir(f"{VID_PATH}"):
    q_uid = q_uid_mp4[:q_uid_mp4.rfind(".")]
    if "ipynb" in q_uid_mp4:
        continue
        
    if q_uid not in to_add:
        continue
    row = [f"{VID_PATH}/"+ q_uid_mp4, f"./features/" + q_uid]
    table.append(row)

In [31]:
df = pd.DataFrame(table,columns=columns)
df.to_csv("test_additional_paths.csv", index=False)

In [32]:
import torch as th
import math
import numpy as np
import torch.nn.functional as F
from tqdm import tqdm
import argparse
from extract.video_loader import VideoLoader
from torch.utils.data import DataLoader
from extract.preprocessing import Preprocessing
from extract.random_sequence_shuffler import RandomSequenceSampler
from args import MODEL_DIR
import clip

In [33]:
dataset = VideoLoader(
    "test_additional_paths.csv",
    framerate = frames / 180,  # one feature per second max
    size=224,
    centercrop=True,
)

In [34]:
n_dataset = len(dataset)
sampler = RandomSequenceSampler(n_dataset, frames)
loader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    num_workers=10,
    sampler=sampler if n_dataset > 10 else None,
)

In [35]:
preprocess = Preprocessing()
model, _ = clip.load("ViT-L/14", download_root=MODEL_DIR)
model.eval()
model = model.cuda()

In [36]:
with th.no_grad():
    for k, data in enumerate(loader):
        input_file = data["input"][0]
        output_file = data["output"][0]
        if len(data["video"].shape) > 3:
            print(
                "Computing features of video {}/{}: {}".format(
                    k + 1, n_dataset, input_file
                )
            )
            video = data["video"].squeeze()
            if len(video.shape) == 4:
                video = preprocess(video)
                n_chunk = len(video)
                features = th.cuda.FloatTensor(n_chunk, 768).fill_(0)
                n_iter = int(math.ceil(n_chunk / float(128)))
                for i in tqdm(range(n_iter)):
                    min_ind = i * 128
                    max_ind = (i + 1) * 128
                    video_batch = video[min_ind:max_ind].cuda()
                    batch_features = model.encode_image(video_batch)
                    if 0:
                        batch_features = F.normalize(batch_features, dim=1)
                    features[min_ind:max_ind] = batch_features
                features = features.cpu().numpy()
                if 1:
                    features = features.astype("float16")
                np.save(output_file, features)
        else:
            print("Video {} already processed.".format(input_file))

Decoding video: ../../EgoSchema/videos/0354b658-b59e-476c-ac9d-739ee656bed0.mp4Decoding video: ../../EgoSchema/videos/001934bb-81bd-4cd8-a574-0472ef3f6678.mp4Decoding video: ../../EgoSchema/videos/01b0d445-64a5-4737-ad26-8f0df5c54af9.mp4Decoding video: ../../EgoSchema/videos/01cd83ef-06c0-43b0-a22f-fba50dc6150d.mp4Decoding video: ../../EgoSchema/videos/0233d5b0-07a6-4693-adf9-158a1d7bdafa.mp4
Decoding video: ../../EgoSchema/videos/01e9637c-d3cd-4679-baa1-5c3846c28b39.mp4Decoding video: ../../EgoSchema/videos/02ef4ec6-451b-4f36-937a-73d653ba2a7a.mp4

Decoding video: ../../EgoSchema/videos/0096d5bd-dafe-48a5-a04d-9efe65d3d5b8.mp4
Decoding video: ../../EgoSchema/videos/028874ba-a149-4499-825f-56d40a5ec11d.mp4Decoding video: ../../EgoSchema/videos/00ea715e-2816-460e-b503-97b8ec760bf2.mp4





Decoding video: ../../EgoSchema/videos/005651d6-f710-4909-b76d-acf7306fb72a.mp4
Decoding video: ../../EgoSchema/videos/024420d4-85a1-4148-bf1e-b111fcd24d73.mp4
Decoding video: ../../EgoSchema/videos/0

100%|██████████| 1/1 [00:00<00:00,  6.36it/s]


Computing features of video 2/20: ../../EgoSchema/videos/0354b658-b59e-476c-ac9d-739ee656bed0.mp4


100%|██████████| 1/1 [00:00<00:00, 10.43it/s]


Computing features of video 3/20: ../../EgoSchema/videos/01e9637c-d3cd-4679-baa1-5c3846c28b39.mp4


100%|██████████| 1/1 [00:00<00:00, 11.32it/s]


Computing features of video 4/20: ../../EgoSchema/videos/02ef4ec6-451b-4f36-937a-73d653ba2a7a.mp4


100%|██████████| 1/1 [00:00<00:00, 10.30it/s]


Computing features of video 5/20: ../../EgoSchema/videos/001934bb-81bd-4cd8-a574-0472ef3f6678.mp4


100%|██████████| 1/1 [00:00<00:00,  9.54it/s]


Computing features of video 6/20: ../../EgoSchema/videos/00ea715e-2816-460e-b503-97b8ec760bf2.mp4


100%|██████████| 1/1 [00:00<00:00, 11.40it/s]


Computing features of video 7/20: ../../EgoSchema/videos/0233d5b0-07a6-4693-adf9-158a1d7bdafa.mp4


100%|██████████| 1/1 [00:00<00:00, 11.78it/s]


Computing features of video 8/20: ../../EgoSchema/videos/01cd83ef-06c0-43b0-a22f-fba50dc6150d.mp4


100%|██████████| 1/1 [00:00<00:00, 11.47it/s]


Computing features of video 9/20: ../../EgoSchema/videos/028874ba-a149-4499-825f-56d40a5ec11d.mp4


100%|██████████| 1/1 [00:00<00:00, 11.71it/s]


Computing features of video 10/20: ../../EgoSchema/videos/0096d5bd-dafe-48a5-a04d-9efe65d3d5b8.mp4


100%|██████████| 1/1 [00:00<00:00, 11.94it/s]


Computing features of video 11/20: ../../EgoSchema/videos/02570b75-5a0a-4ced-9f85-54bfd51ddd78.mp4


100%|██████████| 1/1 [00:00<00:00, 14.27it/s]


Computing features of video 12/20: ../../EgoSchema/videos/02580ac5-bfbf-4b54-9a72-56541bbcb27a.mp4


100%|██████████| 1/1 [00:00<00:00, 13.83it/s]


Computing features of video 13/20: ../../EgoSchema/videos/00594c2d-1c89-47ec-aa3f-1c560cab3d26.mp4


100%|██████████| 1/1 [00:00<00:00, 13.80it/s]


Computing features of video 14/20: ../../EgoSchema/videos/010fb193-bc03-44a2-97fd-261463d06d60.mp4


100%|██████████| 1/1 [00:00<00:00, 13.80it/s]


Computing features of video 15/20: ../../EgoSchema/videos/005651d6-f710-4909-b76d-acf7306fb72a.mp4


100%|██████████| 1/1 [00:00<00:00, 14.09it/s]


Computing features of video 16/20: ../../EgoSchema/videos/00fa650b-df4d-46a2-b19c-cd3e3a3d7f48.mp4


100%|██████████| 1/1 [00:00<00:00, 14.26it/s]


Computing features of video 17/20: ../../EgoSchema/videos/027f192c-b186-456d-8940-67dea5a72c91.mp4


100%|██████████| 1/1 [00:00<00:00, 14.02it/s]


Computing features of video 18/20: ../../EgoSchema/videos/024420d4-85a1-4148-bf1e-b111fcd24d73.mp4


100%|██████████| 1/1 [00:00<00:00, 14.05it/s]


Computing features of video 19/20: ../../EgoSchema/videos/0089a0d6-fe3f-4db7-8c89-19e9e08e5e7c.mp4


100%|██████████| 1/1 [00:00<00:00, 14.13it/s]


Computing features of video 20/20: ../../EgoSchema/videos/001d2d1b-d2f9-4c39-810e-6e2087ff9d5a.mp4


100%|██████████| 1/1 [00:00<00:00, 13.77it/s]
