In [3]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.io import read_image
from torchvision.transforms.functional import resize, center_crop, normalize
import pandas as pd
from tqdm import tqdm

device = torch.device("cuda")

model = models.resnet152(weights=models.ResNet152_Weights.IMAGENET1K_V1)
model = nn.Sequential(*list(model.children())[:-1]).to(device)
model.eval()

def load_image(path):
    img = read_image(path).float() / 255.0
    if img.shape[0] == 1:
        img = img.repeat(3, 1, 1)
    img = resize(img, 256)
    img = center_crop(img, 224)
    img = normalize(
        img,
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
    return img

class ImageDataset(Dataset):
    def __init__(self, image_dir, image_names):
        self.image_dir = image_dir
        self.image_names = image_names

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        return load_image(
            os.path.join(self.image_dir, self.image_names[idx])
        )

image_dir = "data/4866_three_bogatyrs_2/dataset/"
image_names = sorted(os.listdir(image_dir))

dataset = ImageDataset(image_dir, image_names)
loader = DataLoader(
    dataset,
    batch_size=32,
    num_workers=0,
    pin_memory=True
)

features = []

with torch.no_grad():
    for batch in tqdm(loader, desc="Extracting features"):
        batch = batch.to(device)
        feats = model(batch)
        feats = feats.flatten(1)
        features.append(feats)

image_features = torch.cat(features, dim=0)
image_features = torch.nn.functional.normalize(image_features, dim=1)

K = 6
recommendations = []

for i in tqdm(range(image_features.size(0)), desc="Ranking"):
    sims = image_features @ image_features[i]
    topk = torch.topk(sims, K + 1).indices[1:]
    recommendations.append(topk.cpu().tolist())

rows = []
for i, name in enumerate(image_names):
    rows.append([
        name,
        " ".join(image_names[j] for j in recommendations[i])
    ])

df = pd.DataFrame(rows, columns=["filename", "ranking"])
df.to_csv(
    "data/4866_three_bogatyrs_2/submission.csv",
    index=False
)

print("Done")



Extracting features: 100%|███████████████████████████████████████████████████████████| 301/301 [00:25<00:00, 11.62it/s]
Ranking: 100%|███████████████████████████████████████████████████████████████████| 9605/9605 [00:01<00:00, 4889.65it/s]

Done





In [9]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.transforms.functional import resize, center_crop
import open_clip
import faiss
import pandas as pd
from tqdm import tqdm

device = torch.device("cuda")

model, _, preprocess = open_clip.create_model_and_transforms(
    "ViT-L-14",
    pretrained="openai"
)
model = model.to(device).half()
model.eval()

def load_image(path):
    img = read_image(path).float() / 255.0
    if img.shape[0] == 1:
        img = img.repeat(3, 1, 1)
    img = resize(img, 224)
    img = center_crop(img, 224)
    return img

class ImageDataset(Dataset):
    def __init__(self, image_dir, image_names):
        self.image_dir = image_dir
        self.image_names = image_names

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        return load_image(
            os.path.join(self.image_dir, self.image_names[idx])
        )

image_dir = "data/4866_three_bogatyrs_2/dataset/"
image_names = sorted(os.listdir(image_dir))

dataset = ImageDataset(image_dir, image_names)
loader = DataLoader(
    dataset,
    batch_size=64,
    num_workers=0,
    pin_memory=True
)

features = []

with torch.no_grad():
    for batch in tqdm(loader, desc="Extracting CLIP features"):
        batch = batch.to(device).half()
        feats = model.encode_image(batch)
        feats = torch.nn.functional.normalize(feats, dim=1)
        features.append(feats)

features = torch.cat(features, dim=0).cpu().numpy().astype("float32")

index = faiss.IndexFlatIP(features.shape[1])
index.add(features)

K = 6
_, indices = index.search(features, K + 1)

rows = []
for i, name in enumerate(image_names):
    rows.append([
        name,
        " ".join(image_names[j] for j in indices[i][1:])
    ])

df = pd.DataFrame(rows, columns=["filename", "ranking"])
df.to_csv("data/4866_three_bogatyrs_2/submission_clip.csv", index=False)

print("CLIP + FAISS done")

open_clip_model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

Extracting CLIP features: 100%|██████████████████████████████████████████████████████| 151/151 [00:18<00:00,  7.99it/s]


CLIP + FAISS done
