In [1]:
# Instal dependency
#!pip install git+https://github.com/openai/CLIP.git

In [1]:
# Import module
import os
import clip
import glob
import torch
import re
import numpy as np
from PIL import Image
from tqdm import tqdm

# Parse data path

In [2]:
keyframes_dir = r'D:\VN_Multi_User_Video_Search\frontend\ai\public\data\Keyframes'
all_keyframe_paths = dict()
pattern = re.compile(r'^[A-Z]\d+_V\d+$')

for folder_name in os.listdir(keyframes_dir):
    if pattern.match(folder_name):
        folder_path = os.path.join(keyframes_dir, folder_name)
        if os.path.isdir(folder_path):
            data_part, video_id = folder_name.split('_')
            keyframe_paths = sorted(glob.glob(os.path.join(folder_path, '*.jpg')))
            if keyframe_paths:
                all_keyframe_paths.setdefault(data_part, {})[video_id] = keyframe_paths

# Model

In [3]:
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [4]:
##### Load Model #####
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, preprocess = clip.load("ViT-L/14@336px", device=device)

cuda


100%|███████████████████████████████████████| 891M/891M [00:51<00:00, 18.0MiB/s]


In [5]:
bs = 4
save_dir = './CLIP_features'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

for key, video_keyframe_paths in all_keyframe_paths.items():
    out_key_dir = os.path.join(save_dir, key)
    os.makedirs(out_key_dir, exist_ok=True)

    for video_id in tqdm(sorted(video_keyframe_paths.keys())):
        out_path = os.path.join(out_key_dir, f"{video_id}.npy")
        if os.path.exists(out_path):  # bỏ qua nếu đã có kết quả
            continue

        video_feats = []
        frames = video_keyframe_paths[video_id]
        for i in range(0, len(frames), bs):
            imgs = [preprocess(Image.open(p)).unsqueeze(0) for p in frames[i:i+bs]]
            imgs = torch.cat(imgs).to(device)
            with torch.no_grad():
                feats = model.encode_image(imgs)
                feats /= feats.norm(dim=-1, keepdim=True)
            video_feats.extend(feats.cpu().numpy().astype(np.float32))
        
        np.save(out_path, video_feats)

100%|██████████| 31/31 [04:06<00:00,  7.96s/it]
100%|██████████| 31/31 [03:40<00:00,  7.13s/it]
100%|██████████| 29/29 [03:01<00:00,  6.26s/it]
100%|██████████| 30/30 [03:25<00:00,  6.84s/it]
100%|██████████| 31/31 [03:34<00:00,  6.93s/it]
100%|██████████| 31/31 [03:42<00:00,  7.17s/it]
100%|██████████| 31/31 [03:38<00:00,  7.05s/it]
100%|██████████| 30/30 [04:06<00:00,  8.22s/it]
100%|██████████| 28/28 [03:22<00:00,  7.23s/it]
100%|██████████| 28/28 [03:40<00:00,  7.87s/it]
100%|██████████| 31/31 [03:30<00:00,  6.78s/it]
100%|██████████| 31/31 [03:41<00:00,  7.16s/it]
100%|██████████| 30/30 [03:59<00:00,  8.00s/it]
100%|██████████| 30/30 [04:02<00:00,  8.07s/it]
100%|██████████| 31/31 [03:28<00:00,  6.72s/it]
100%|██████████| 32/32 [03:50<00:00,  7.19s/it]
100%|██████████| 30/30 [03:28<00:00,  6.97s/it]
100%|██████████| 28/28 [03:13<00:00,  6.90s/it]
100%|██████████| 31/31 [04:31<00:00,  8.75s/it]
100%|██████████| 31/31 [03:57<00:00,  7.67s/it]
100%|██████████| 29/29 [02:01<00:00,  4.