# Extract feature from Keyframes using ViT-16

In [1]:
# Instal dependency
!pip install open_clip_torch -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Import module
import os
import open_clip
import glob
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm

# Load Model

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', device=device, pretrained='datacomp_xl_s13b_b90k')

cuda


open_clip_pytorch_model.bin:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

# Parse data path

# Run inference (demo = True is running for 1 directory, otherwise all directory)

In [None]:
keyframes_dir = "/kaggle/input/extract-the-keyframes/Keyframes/"

# Thư mục để lưu các features
save_dir = "/kaggle/working/CLIPv2_features/"
os.makedirs(save_dir, exist_ok=True)

# Lấy danh sách tất cả các file ảnh trong thư mục
image_files = sorted([f for f in os.listdir(keyframes_dir) if f.endswith(('.jpg', '.jpeg', '.png','.webp'))])

# Duyệt qua từng ảnh và trích xuất features
for image_file in image_files:
    # Đường dẫn đầy đủ đến ảnh
    image_path = os.path.join(keyframes_dir, image_file)
    
    # Mở ảnh và tiền xử lý
    image = preprocess(Image.open(image_path)).unsqueeze(0)
    image = image.to(device)
    
    # Trích xuất features
    with torch.no_grad(), torch.cuda.amp.autocast():
        image_feats = model.encode_image(image)
    
    # Chuẩn hóa features
    image_feats /= image_feats.norm(dim=-1, keepdim=True)
    image_feats = image_feats.detach().cpu().numpy().astype(np.float32).flatten()
    
    # Lưu features vào file .npy
    video_id = os.path.splitext(image_file)[0]  # Lấy tên file không có phần mở rộng
    np.save(os.path.join(save_dir, f"{video_id}.npy"), image_feats)
    
    print(f"Processed {image_file} and saved features to {save_dir}{video_id}.npy")

# Convert to all features to file.bin

In [None]:
!pip install faiss-cpu

In [6]:
import os
import glob
import faiss
import numpy as np
from tqdm import tqdm

In [None]:
feature_shape = 768
features_dir = '/kaggle/working/CLIPv2_features'

# Khởi tạo FAISS index
index = faiss.IndexFlatIP(feature_shape)
feature_files = sorted(glob.glob(os.path.join(features_dir, '*.npy')))

for feature_file in feature_files:
    print(f"Processing {feature_file}")
    
    feat = np.load(feature_file).astype(np.float32).reshape(1, -1)
    
    index.add(feat)

# Lưu index vào file
faiss.write_index(index, "faiss_clipv2_cosine.bin")
print("FAISS index saved to faiss_clipv2_cosine.bin")

In [9]:
!rm -rf /kaggle/working/CLIPv2_features