In [1]:
import torch
import clip
import numpy as np
import glob
import math
from tqdm import tqdm
import pandas as pd

from PIL import Image
import pyheif
import rawpy

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [2]:
batch_size = 12

df = pd.read_excel('dataset/razm.xlsx')
files = ('dataset/images/' + df['Названия']).values
batches = np.array_split(files, math.ceil(len(files) / batch_size))

In [4]:
def load_image(path):
    # Try to read image using PIL
    try:
        return Image.open(path)
    except Image.UnidentifiedImageError:
        pass
    
    # Try to read image using rawpy
    try:
        return Image.fromarray(rawpy.imread(path).postprocess())
    except rawpy.LibRawFileUnsupportedError:
        pass

    # Try to read image using heif
    try:
        heif = pyheif.read_heif(path)
        return Image.frombytes(mode=heif.mode, size=heif.size, data=heif.data)
    except ValueError:
        pass

In [5]:
model.eval()
total = []

with torch.no_grad():
    for batch in tqdm(batches):
        batch = torch.stack([preprocess(load_image(file)) for file in batch]).to(device)
        features = model.encode_image(batch)
        features /= features.norm(dim=-1, keepdim=True)
        total.append(features)

total = torch.cat(total)

  2%|▏         | 16/906 [00:08<08:49,  1.68it/s]

In [6]:
torch.save(total, 'ViT-B-32-image-features.pth')