# Setup OoD pipeline

---

In this notebook user can setup parameters for OoD methods based on embeddings:
* OoD Entropy method (https://arxiv.org/abs/2002.03103)
* KNN Dist (https://arxiv.org/pdf/2207.03061.pdf)
* RMD (https://arxiv.org/abs/2106.09022)

Confident Learning methods depends only on classifications, accepts any file with extension `.clf.pkl`.


## Generate embeddings files

---


In [None]:
import os
import numpy as np
import pandas as pd
import timm

from tool.core.data_types import types
import czebra as cz
from tool.core.czebra_adapter import CZebraAdapter

In [None]:
# Setup data

ood_session_dir = '/home/vlasova/datasets/ood_datasets/MNIST_FashionMNIST/oodsession_0'
metadata_file_path = os.path.join(ood_session_dir, 'DatasetDescription.meta.pkl')
metadata_df = pd.read_pickle(metadata_file_path)
data_dir = '/home/vlasova/datasets/ood_datasets/MNIST_FashionMNIST'
output_dir = ood_session_dir

### Supported models

---
#### Pretrained on ImageNet

##### Swin Based

In [None]:
timm_swin_model_ids = ['swin_base_patch4_window7_224', 'swin_base_patch4_window7_224_in22k', 'swin_base_patch4_window12_384', 'swin_base_patch4_window12_384_in22k', 'swin_large_patch4_window7_224', 'swin_large_patch4_window7_224_in22k', 'swin_large_patch4_window12_384', 'swin_large_patch4_window12_384_in22k', 'swin_s3_base_224', 'swin_s3_small_224', 'swin_s3_tiny_224', 'swin_small_patch4_window7_224', 'swin_tiny_patch4_window7_224', 'swinv2_base_window8_256', 'swinv2_base_window12_192_22k', 'swinv2_base_window12to16_192to256_22kft1k', 'swinv2_base_window12to24_192to384_22kft1k', 'swinv2_base_window16_256', 'swinv2_cr_small_224', 'swinv2_cr_small_ns_224', 'swinv2_cr_tiny_ns_224', 'swinv2_large_window12_192_22k', 'swinv2_large_window12to16_192to256_22kft1k', 'swinv2_large_window12to24_192to384_22kft1k', 'swinv2_small_window8_256', 'swinv2_small_window16_256', 'swinv2_tiny_window8_256', 'swinv2_tiny_window16_256']
['_'.join(('timm', resnet_id)) for resnet_id in timm_swin_model_ids]

##### ResNet Based

In [None]:
['_'.join(('timm', resnet_id)) for resnet_id in ['resnet50']]

#### Pretrained on TrafficLights Dataset

In [None]:
shared_regnet_tl_embedders_id = cz.search_model(framework="torch", arch="shared-regnet", usecase="trafficlights")
[model_info.model_id for model_info in shared_regnet_tl_embedders_id]

In [None]:
# Select embedder id
emb_id = 'timm_swin_small_patch4_window7_224' 
embedder_wrapper = CZebraAdapter(metadata_df, data_dir, output_dir=output_dir)
output_file, _ = embedder_wrapper.predict(model_id=emb_id)
print(output_file)

In [None]:
## List of emb files in ood_session_dir

embeddings_files = []

for file in os.listdir(ood_session_dir):
    if file.endswith(".emb.pkl"):
        print(file)
        embeddings_files.append(file)

## OoD KNN Dist 

---

In [None]:
from tool.core.ood_score import ood_confident_learning

In [None]:
# embeddings_file = 'timm_swin_small_patch4_window7_224.emb.pkl'
for embeddings_file in embeddings_files:
    ood_score = ood_confident_learning.score_embeddings(embedding_file=os.path.join(ood_session_dir, embeddings_file),
                                                        metadata_df=metadata_df)

    ood_knn_dist_df = pd.DataFrame()
    ood_knn_dist_df[types.RelativePathType.name()] = metadata_df[types.RelativePathType.name()]
    ood_knn_dist_df[types.OoDScoreType.name()] = ood_score

    ood_file_name = ''.join((embeddings_file, '_knn_dist', '.ood.pkl'))
    ood_knn_dist_df.to_pickle(os.path.join(ood_session_dir, ood_file_name))

## OoD Entropy

---

In [None]:
from tool.core.ood_score import ood_entropy

In [None]:
regularization_coefficients = {1e-5, 1.0, 1e5}
classifier_type = 'saga'
reduce_dim = False
# Required only if reduce_dim set to True
n_components = [768, 612]
embeddings_files = ['torch_shared-regnet_trafficlights_v12.emb.pkl',  'timm_resnet50.emb.pkl']
embeddings_files = [os.path.join(ood_session_dir, emb_file) for emb_file in embeddings_files]

ood_score = ood_entropy.score_embeddings(
    embeddings_files=embeddings_files,
    metadata_df=metadata_df,
    regularization_coefficients = regularization_coefficients,
    classifier_type = classifier_type, logs_callback=print, reduce_dim=reduce_dim, n_components=n_components)

ood_entropy_df = pd.DataFrame()
ood_entropy_df[types.RelativePathType.name()] = metadata_df[types.RelativePathType.name()]
ood_entropy_df[types.OoDScoreType.name()] = ood_score

ood_file_name = ''.join(('ood_entropy_score', '.ood.pkl'))
print(ood_file_name)
ood_entropy_df.to_pickle(os.path.join(ood_session_dir, ood_file_name))

## OoD RMD

---

In [None]:
from tool.core.ood_score import ood_relative_mahalanobis_distance

In [None]:
embeddings_file = 'timm_swin_base_patch4_window7_224.emb.pkl'
bayes = False
relative = True
ood_score = ood_relative_mahalanobis_distance.score(os.path.join(ood_session_dir, embeddings_file),
                                                    metadata_df, bayes=bayes)
ood_rmd_df = pd.DataFrame()
ood_rmd_df[types.RelativePathType.name()] = metadata_df[types.RelativePathType.name()]
ood_rmd_df[types.OoDScoreType.name()] = ood_score

In [None]:
suffix = '_bayes_rmd' if bayes else '_rmd'
suffix = suffix + '_relative' if relative else suffix
ood_file_name = ''.join((embeddings_file, suffix, '.ood.pkl'))
ood_rmd_df.to_pickle(os.path.join(ood_session_dir, ood_file_name))

## Confidence Learning

---

In [None]:
from tool.core.ood_score import ood_confident_learning

probabilities_file = os.path.join(ood_session_dir, 'torch_shared-regnet_trafficlights_v12.clf.pkl')

ood_score = ood_confident_learning.score_predicted_probabilities(
            probabilities_file=probabilities_file,
            metadata_df=metadata_df, head_idx=0)

In [None]:
ood_cl = pd.DataFrame()
ood_cl[types.RelativePathType.name()] = metadata_df[types.RelativePathType.name()]
ood_cl[types.OoDScoreType.name()] = ood_score

ood_file_name = ''.join(('confidence_learning_ood', '.ood.pkl'))
ood_cl.to_pickle(os.path.join(ood_session_dir, ood_file_name))

## Run metrics

---

In [None]:
from tool.core.ood_score.metrics import run_metrics

In [None]:
metadata_df.sample(n = 10)

In [None]:
## List of ood files in ood_session_dir

for file in os.listdir(ood_session_dir):
    if file.endswith(".ood.pkl"):
        print(file)

In [None]:
selected_file = 'timm_swin_small_patch4_window7_224.emb.pkl_knn_dist.ood.pkl'
ood_df = pd.read_pickle(os.path.join(ood_session_dir, selected_file))
# Relative paths
ood_folders = ['main/val/trash', 'main/val/pedestrian_tl_10_forward', 'main/val/pedestrian_tl_blinked',
               'main/val/pedestrian_tl_01_stop']

# If there is no probability score, then set it to None
probabilities_file = os.path.join(ood_session_dir, 'torch_shared-regnet_trafficlights_v12.clf.pkl')

In [None]:
run_metrics(ood_df, metadata_df, ood_folders, logs_callback=print, k=300, probabilities_file=probabilities_file)