In [43]:
import pandas as pd
import numpy as np

In [44]:
from pathlib import Path
from pprint import pprint
from typing import Tuple

import hydra
import pytorch_lightning as pl
from omegaconf import DictConfig
from oml.const import TCfg
from oml.datasets.images import get_retrieval_images_datasets
from oml.lightning.callbacks.metric import MetricValCallback
from oml.lightning.modules.extractor import ExtractorModule, ExtractorModuleDDP
from oml.lightning.pipelines.parser import (
    check_is_config_for_ddp,
    parse_logger_from_config,
    parse_ckpt_callback_from_config,
    parse_engine_params_from_config,
    parse_sampler_from_config,
    parse_scheduler_from_config,
)
from oml.metrics.embeddings import EmbeddingMetrics
from oml.registry.losses import get_criterion_by_cfg
from oml.registry.models import get_extractor_by_cfg
from oml.registry.optimizers import get_optimizer_by_cfg
from oml.registry.transforms import TRANSFORMS_REGISTRY, get_transforms_by_cfg
from oml.utils.misc import dictconfig_to_dict, set_global_seed
from torch.utils.data import DataLoader

import torch

import albumentations as albu
import cv2
from albumentations.pytorch import ToTensorV2
from oml.const import MEAN, PAD_COLOR, STD, TNormParam

In [45]:
from datetime import datetime

postfix = "metric_learning"

current_dateTime = datetime.now()
y = current_dateTime.year
month = current_dateTime.month
d = current_dateTime.day
hour = current_dateTime.hour
minute = current_dateTime.minute
s = current_dateTime.second
ms = current_dateTime.microsecond

cfg: TCfg = {
    "postfix": postfix,
    "seed": 42,
    "image_size": 224,
    "accelerator": "gpu",
    "devices": 1, 
    "num_workers": 4,
    "cache_size": 0,
    "test_data_dir": "../test/gallery/",
    "bs_val": 8,  

    "extractor":{
        "name": "vit",
        "args":{
            "arch": "vitl14",
            # "gem_p": 1.0,
            # "remove_fc": True,
            "normalise_features": False,
            "weights": "checkpoints/best-v4.ckpt",
        },
    }
}


In [46]:
def get_transforms(im_size: int, mean: TNormParam = MEAN, std: TNormParam = STD) -> albu.Compose:
    """
    Use default oml albu augs, but without HorizontalFlip.
    :param im_size:
    :param mean:
    :param std:
    :return:
    """
    return albu.Compose(
        [
            albu.LongestMaxSize(max_size=im_size),
            albu.PadIfNeeded(
                min_height=im_size,
                min_width=im_size,
                border_mode=cv2.BORDER_CONSTANT,
                value=PAD_COLOR,
            ),
            albu.Normalize(mean=mean, std=std),
            ToTensorV2(),
        ],
    )

# Формирование предсказания

In [47]:
import itertools
import json
from pathlib import Path

import pytorch_lightning as pl
from torch.utils.data import DataLoader

from oml.const import IMAGE_EXTENSIONS
from oml.datasets.images import ImageBaseDataset
from oml.ddp.utils import get_world_size_safe, is_main_process, sync_dicts_ddp
from oml.transforms.images.utils import get_im_reader_for_transforms
from oml.utils.images.images import find_broken_images
from oml.utils.misc import dictconfig_to_dict


def extractor_prediction_pipeline(cfg: TCfg) -> None:
    """
    This pipeline allows you to save features extracted by a feature extractor.

    """
    print(cfg)

    transforms = get_transforms(cfg['image_size'])
    filenames = [list(Path(cfg["test_data_dir"]).glob(f"**/*.{ext}")) for ext in IMAGE_EXTENSIONS]
    filenames = list(itertools.chain(*filenames))

    if len(filenames) == 0:
        raise RuntimeError(f"There are no images in the provided directory: {cfg['test_data_dir']}")

    f_imread = get_im_reader_for_transforms(transforms)

    print("Let's check if there are broken images:")
    broken_images = find_broken_images(filenames, f_imread=f_imread)
    if broken_images:
        raise ValueError(f"There are images that cannot be open:\n {broken_images}.")

    dataset = ImageBaseDataset(paths=filenames, transform=transforms, f_imread=f_imread)

    loader = DataLoader(
        dataset=dataset, batch_size=cfg["bs_val"], num_workers=cfg["num_workers"], shuffle=False, drop_last=False
    )

    extractor = get_extractor_by_cfg(cfg["extractor"])
    pl_model = ExtractorModule(extractor=extractor)

    trainer_engine_params = parse_engine_params_from_config(cfg)
    trainer_engine_params["use_distributed_sampler"] = True
    trainer = pl.Trainer(precision=16, **trainer_engine_params)
    predictions = trainer.predict(model=pl_model, dataloaders=loader, return_predictions=True)

    paths, embeddings = [], []
    for prediction in predictions:
        paths.extend([filenames[i] for i in prediction[dataset.index_key].tolist()])
        embeddings.extend(prediction[pl_model.embeddings_key].tolist())

    paths = sync_dicts_ddp({"key": list(map(str, paths))}, get_world_size_safe())["key"]
    embeddings = sync_dicts_ddp({"key": embeddings}, get_world_size_safe())["key"]

    
    return dict(zip(paths, embeddings))

In [48]:
!ls checkpoints

best-v1.ckpt  best-v2.ckpt  best-v3.ckpt  best-v4.ckpt	best.ckpt


In [49]:
dict_results = extractor_prediction_pipeline(cfg)

{'postfix': 'metric_learning', 'seed': 42, 'image_size': 224, 'accelerator': 'gpu', 'devices': 1, 'num_workers': 4, 'cache_size': 0, 'test_data_dir': '../test/gallery/', 'bs_val': 8, 'extractor': {'name': 'vit', 'args': {'arch': 'vitl14', 'normalise_features': False, 'weights': 'checkpoints/best-v4.ckpt'}}}
Let's check if there are broken images:


  albu.PadIfNeeded(
100%|██████████| 4784/4784 [00:04<00:00, 1193.89it/s]


Prefix <model.model.> was removed from the state dict.


/venv/main/lib/python3.10/site-packages/lightning_fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
Using 16bit Automatic Mixed Precision (AMP)
You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [50]:
import faiss
import numpy as np
from tqdm import tqdm

# Преобразуем данные в массив numpy
paths = list(dict_results.keys())
embeddings = np.array(list(dict_results.values()), dtype=np.float32)

# Нормализуем эмбеддинги для косинусной близости
faiss.normalize_L2(embeddings)

# Создаем индекс FAISS для косинусной близости
index = faiss.IndexFlatIP(embeddings.shape[1])  # IndexFlatIP для внутреннего произведения (косинусная близость)
index.add(embeddings)  # Добавляем эмбеддинги в индекс

final_result = {}
# Выбираем запрашиваемое изображение 
for query_index in tqdm(range(len(paths))):
    query_embedding = embeddings[query_index].reshape(1, -1)
    query = str(Path(paths[query_index]).name)
    # Ищем ближайшие изображения
    k = embeddings.shape[0]  # Количество ближайших соседей (все изображения)
    distances, indices = index.search(query_embedding, k)
    
    # Сортируем результаты по расстоянию (косинусная близость)
    sorted_results = [Path(paths[i]).name for i in indices[0]]
    final_result[query] = sorted_results


100%|██████████| 4784/4784 [01:13<00:00, 65.45it/s]


In [51]:
submission_df = pd.DataFrame(list(final_result.items()), columns=["image_name", "recommendation"])
submission_df["recommendation"] = submission_df["recommendation"].apply(lambda x: list(x))

In [52]:
submission_df.head()

Unnamed: 0,image_name,recommendation
0,000248c80d39c0b6f24acd036015f10b.jpg,"[000248c80d39c0b6f24acd036015f10b.jpg, 3b3bb4e..."
1,000b87d3a508b8713b983b9c438664ed.jpg,"[000b87d3a508b8713b983b9c438664ed.jpg, 3247e74..."
2,0017f47aaacaf983263fcd13d43d0ad1.jpg,"[0017f47aaacaf983263fcd13d43d0ad1.jpg, d1d6333..."
3,0019608b3f00de8fe18aff80fd940a42.jpg,"[0019608b3f00de8fe18aff80fd940a42.jpg, e11cfab..."
4,001cac7590f6173c35e11f6ba28fe6f4.jpg,"[001cac7590f6173c35e11f6ba28fe6f4.jpg, e3c83eb..."


In [53]:
submission_df.to_csv("submission.csv", index=False)