# Setup

In [None]:
# Parameters
keyframes_dir = None
save_dir = None
bs = None

In [None]:
import os

dir_path = os.getcwd()

if not keyframes_dir:
    if 'google.colab' in str(get_ipython()):
        # Update this path as necessary
        keyframes_dir = f'{dir_path}/Keyframes'
    elif 'kaggle' in str(get_ipython()):
        keyframes_dir = f'{dir_path}/Keyframes'
    else:
        parent_dir_path = os.path.dirname(dir_path)
        keyframes_dir = f'{parent_dir_path}/transnet/Keyframes'

if not bs:
    bs = 4

if not save_dir:
    save_dir = './CLIPv2_features'

In [None]:
# Instal dependency
! pip install open_clip_torch

In [None]:
# Import module
import os
import open_clip
import glob
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm

# Parse data path

In [None]:
def parse_keyframe_info(keyframes_dir='../transnet/Keyframes'):
    all_keyframe_paths = {}
    for part in sorted(os.listdir(keyframes_dir)):
        data_part = part.split('/')[-1]
        all_keyframe_paths[data_part] = {}

    for data_part in sorted(all_keyframe_paths.keys()):
        data_part_path = f'{keyframes_dir}/{data_part}'
        video_dirs = sorted(os.listdir(data_part_path))
        video_ids = [video_dir.split('_')[-1] for video_dir in video_dirs]
        for video_id, video_dir in zip(video_ids, video_dirs):
            keyframe_paths = sorted(
                glob.glob(f'{data_part_path}/{video_dir}/*.jpg'))
            all_keyframe_paths[data_part][video_id] = keyframe_paths

    return all_keyframe_paths

# Model

In [None]:
##### Load Model #####
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', device=device, pretrained='datacomp_xl_s13b_b90k')

In [None]:
def create_directory(path):
    """Create a directory if it does not exist."""
    if not os.path.exists(path):
        os.makedirs(path)


def process_and_save_results(all_keyframe_paths, save_dir, model, bs):
    create_directory(save_dir)
    
    for key, video_keyframe_paths in tqdm(all_keyframe_paths.items()):
        video_ids = sorted(video_keyframe_paths.keys())
        video_path = f'{save_dir}/{key}'
        create_directory(video_path)

        
        for video_id in tqdm(video_ids):
            video_feats = []
            video_keyframe_path = video_keyframe_paths[video_id]
            for i in range(0, len(video_keyframe_path), bs):
                # Support batchsize inferencing
                images = []
                image_paths = video_keyframe_path[i:i+bs]
                for image_path in image_paths:
                    image = preprocess(Image.open(image_path)).unsqueeze(0)
                    images.append(image)
                images = torch.cat(images).to(device)

                with torch.no_grad(), torch.cuda.amp.autocast():
                    image_feats = model.encode_image(images)
                image_feats /= image_feats.norm(dim=-1, keepdim=True)

                for b in range(image_feats.shape[0]):
                    video_feats.append(image_feats[b].detach().cpu().numpy().astype(np.float32).flatten())
            
            np.save(f'{video_path}/{video_id}.npy', video_feats)

In [None]:
all_keyframe_paths = parse_keyframe_info(keyframes_dir)
process_and_save_results(all_keyframe_paths, save_dir, model, bs)