# Model performance

In [13]:
import numpy as np
import shap
import pickle
from datasets import load_dataset
from src.utils import legacy_get_dataset_info
from transformers import pipeline, AutoTokenizer
import pandas as pd
from datasets import load_dataset, Dataset
import os
from tqdm import tqdm
from src.utils import token_segments, text_ft_index_ends, ConfigLoader
# from src.models import Model
import lightgbm as lgb
from src.models import WeightedEnsemble, StackModel, AllAsTextModel
from src.joint_masker import JointMasker
import argparse
import scipy as sp
from sklearn.metrics import roc_auc_score

In [23]:
def run_shap(
    config_type,
    max_samples=100,
    test_set_size=100,
):
    # Shap args
    args = ConfigLoader(config_type, "../configs/shap_configs.yaml")
    # Dataset info
    di = ConfigLoader(args.dataset, "../configs/dataset_configs.yaml")
    # Data
    all_text_versions = [
        "all_as_text",
        "all_as_text_base_reorder",
        "all_as_text_tnt_reorder",
    ]
    ds_name = (
        di.all_text_dataset
        if args.model_type in all_text_versions
        else di.ordinal_dataset
    )
    train_df = load_dataset(
        ds_name,
        split="train",  # download_mode="force_redownload"
    ).to_pandas()
    y_train = train_df[di.label_col]

    test_df = load_dataset(
        ds_name,
        split="test",  # download_mode="force_redownload"
    ).to_pandas()
    test_df = test_df.sample(test_set_size, random_state=55)

    # Models
    tokenizer = AutoTokenizer.from_pretrained(
        args.text_model_base, model_max_length=512
    )
    if args.model_type in all_text_versions:
        text_pipeline = pipeline(
            "text-classification",
            model=args.my_text_model,
            tokenizer=tokenizer,
            device="cuda:0",
            truncation=True,
            padding=True,
            top_k=None,
        )
        # Define how to convert all columns to a single string
        if args.model_type == "all_as_text":
            def cols_to_str_fn(array): return " | ".join(
                [f"{col}: {val}" for col, val in zip(
                    di.tab_cols + di.text_cols, array)]
            )
        else:
            # # Reorder based on the new index order in di
            # cols_to_str_fn = lambda array: " | ".join(
            #     [
            #         f"{col}: {val}"
            #         for _, col, val in sorted(
            #             zip(di.new_idx_order, di.tab_cols + di.text_cols, array)
            #         )
            #     ]
            # )
            raise NotImplementedError(
                "Shouldn't need much as the column ordering is in dataset info,\
                just need to update the cols_to_str_fn"
            )

        model = AllAsTextModel(
            text_pipeline=text_pipeline,
            cols_to_str_fn=cols_to_str_fn,
        )
    else:
        text_pipeline = pipeline(
            "text-classification",
            model=args.my_text_model,
            tokenizer=tokenizer,
            device="cuda:0",
            truncation=True,
            padding=True,
            top_k=None,
        )
        # Define how to convert the text columns to a single string
        if len(di.text_cols) == 1:

            def cols_to_str_fn(array):
                return array[0]

        else:

            def cols_to_str_fn(array):
                return " | ".join(
                    [f"{col}: {val}" for col, val in zip(di.text_cols, array)]
                )

        # LightGBM requires explicitly marking categorical features
        train_df[di.categorical_cols] = train_df[di.categorical_cols].astype(
            "category")
        test_df[di.categorical_cols] = test_df[di.categorical_cols].astype(
            "category")

        tab_model = lgb.LGBMClassifier(random_state=42)
        tab_model.fit(train_df[di.tab_cols], y_train)

        if args.model_type in ["ensemble_25", "ensemble_50", "ensemble_75"]:
            text_weight = float(args.model_type.split("_")[-1]) / 100
            model = WeightedEnsemble(
                tab_model=tab_model,
                text_pipeline=text_pipeline,
                text_weight=text_weight,
                cols_to_str_fn=cols_to_str_fn,
            )
        elif args.model_type == "stack":
            """
            For the stack model, we make predictions on the validation set. These predictions
            are then used as features for the stack model (another LightGBM model) along with
            the other tabular features. In doing so the stack model learns, depending on the
            tabular features, when to trust the tabular model and when to trust the text model.
            """
            val_df = load_dataset(
                ds_name,
                split="validation",  # download_mode="force_redownload"
            ).to_pandas()
            val_df[di.categorical_cols] = val_df[di.categorical_cols].astype(
                "category")
            y_val = val_df[di.label_col]
            val_text = list(map(cols_to_str_fn, val_df[di.text_cols].values))

            # Training set is the preditions from the tabular and text models on the validation set
            # plus the tabular features from the validation set
            text_val_preds = text_pipeline(val_text)
            # text_val_preds = np.array(
            #     [format_text_pred(pred) for pred in text_val_preds]
            # )
            text_val_preds = np.array(
                [[lab["score"] for lab in pred] for pred in text_val_preds]
            )

            # add text and tabular predictions to the val_df
            stack_val_df = val_df[di.tab_cols]
            tab_val_preds = tab_model.predict_proba(stack_val_df)
            for i in range(text_val_preds.shape[1]):
                stack_val_df[f"text_pred_{i}"] = text_val_preds[:, i]
            for i in range(tab_val_preds.shape[1]):
                stack_val_df[f"tab_pred_{i}"] = tab_val_preds[:, i]

            stack_model = lgb.LGBMClassifier(random_state=42)
            stack_model.fit(stack_val_df, y_val)

            model = StackModel(
                tab_model=tab_model,
                text_pipeline=text_pipeline,
                stack_model=stack_model,
                cols_to_str_fn=cols_to_str_fn,
            )
        else:
            raise ValueError(f"Invalid model type of {args.model_type}")

    np.random.seed(1)
    # test_sample_vals = test_df_sample[di.tab_cols + di.text_cols].values
    test_vals = test_df[di.tab_cols + di.text_cols].values

    preds = model.predict(test_vals)
    actual = test_df[di.label_col].values
    return (
        # model.predict(test_sample_vals),
        # test_df_sample[di.label_col].values,
        "nan",
        "nan",
        preds,
        actual,
    )

## Neat results

In [24]:
txt_mod2code = {
    "disbert": "",
    "bert": "1",
    "drob": "2",
    "deberta": "3",
}
mod2code = {
    "all_text": "0",
}
results_dict = {}
for text_model_code in [
    "disbert",
    "bert",
    "drob",
    "deberta",
]:  #
    sf = 1
    for ds_type in [
        "channel",
        # "prod_sent",
        # "wine",
        # "salary",
        # "airbnb"
    ]:
        for model_type in [
            # "ensemble_25",
            # "ensemble_50",
            # "ensemble_75",
            # "stack",
            # "stack2",
            "all_text",
            # "all_as_text_tnt_reorder",
            # "all_as_text_base_reorder",
        ]:
            config = f"{ds_type}_{text_model_code}_{model_type}"
            try:
                sample_preds, sample_y, preds, y = run_shap(
                    config_type=config,
                )
                results_dict[
                    (text_model_code, ds_type, model_type)
                ] = f"Test acc: {np.mean(np.argmax(preds, axis=1) == y)}"
            except Exception as e:
                results_dict[(text_model_code, ds_type, model_type)] = "Failed"
                print(e)
    # with open(f"results_dict_{text_model_code}.txt", "w") as f:
    #     f.write(str(results_dict))


{'config': 'channel_disbert_all_text', 'dataset': 'channel', 'text_model_base': 'distilbert-base-uncased', 'model_type': 'all_as_text', 'my_text_model': 'james-burton/channel_0'}


{'config': 'channel', 'tab_cols': [' n_tokens_content', ' n_unique_tokens', ' n_non_stop_words', ' n_non_stop_unique_tokens', ' num_hrefs', ' num_self_hrefs', ' num_imgs', ' num_videos', ' average_token_length', ' num_keywords', ' global_subjectivity', ' global_sentiment_polarity', ' global_rate_positive_words', ' global_rate_negative_words', ' rate_positive_words', ' rate_negative_words'], 'categorical_cols': [], 'text_cols': ['article_title'], 'label_names': [' data_channel_is_tech', ' data_channel_is_entertainment', ' data_channel_is_lifestyle', ' data_channel_is_bus', ' data_channel_is_world', ' data_channel_is_socmed'], 'label_col': 'channel', 'num_labels': 6, 'prob_type': 'single_label_classification', 'wandb_proj_name': 'News Channel', 'all_text_dataset': 'james-burton/news_channel_all_text', 'ordina

Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)



{'config': 'channel_bert_all_text', 'dataset': 'channel', 'text_model_base': 'bert-base-uncased', 'model_type': 'all_as_text', 'my_text_model': 'james-burton/channel_10'}


{'config': 'channel', 'tab_cols': [' n_tokens_content', ' n_unique_tokens', ' n_non_stop_words', ' n_non_stop_unique_tokens', ' num_hrefs', ' num_self_hrefs', ' num_imgs', ' num_videos', ' average_token_length', ' num_keywords', ' global_subjectivity', ' global_sentiment_polarity', ' global_rate_positive_words', ' global_rate_negative_words', ' rate_positive_words', ' rate_negative_words'], 'categorical_cols': [], 'text_cols': ['article_title'], 'label_names': [' data_channel_is_tech', ' data_channel_is_entertainment', ' data_channel_is_lifestyle', ' data_channel_is_bus', ' data_channel_is_world', ' data_channel_is_socmed'], 'label_col': 'channel', 'num_labels': 6, 'prob_type': 'single_label_classification', 'wandb_proj_name': 'News Channel', 'all_text_dataset': 'james-burton/news_channel_all_text', 'ordinal_datase

Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)



{'config': 'channel_drob_all_text', 'dataset': 'channel', 'text_model_base': 'distilroberta-base', 'model_type': 'all_as_text', 'my_text_model': 'james-burton/channel_20'}


{'config': 'channel', 'tab_cols': [' n_tokens_content', ' n_unique_tokens', ' n_non_stop_words', ' n_non_stop_unique_tokens', ' num_hrefs', ' num_self_hrefs', ' num_imgs', ' num_videos', ' average_token_length', ' num_keywords', ' global_subjectivity', ' global_sentiment_polarity', ' global_rate_positive_words', ' global_rate_negative_words', ' rate_positive_words', ' rate_negative_words'], 'categorical_cols': [], 'text_cols': ['article_title'], 'label_names': [' data_channel_is_tech', ' data_channel_is_entertainment', ' data_channel_is_lifestyle', ' data_channel_is_bus', ' data_channel_is_world', ' data_channel_is_socmed'], 'label_col': 'channel', 'num_labels': 6, 'prob_type': 'single_label_classification', 'wandb_proj_name': 'News Channel', 'all_text_dataset': 'james-burton/news_channel_all_text', 'ordinal_datas

Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)



{'config': 'channel_deberta_all_text', 'dataset': 'channel', 'text_model_base': 'microsoft/deberta-v3-small', 'model_type': 'all_as_text', 'my_text_model': 'james-burton/channel_30'}


{'config': 'channel', 'tab_cols': [' n_tokens_content', ' n_unique_tokens', ' n_non_stop_words', ' n_non_stop_unique_tokens', ' num_hrefs', ' num_self_hrefs', ' num_imgs', ' num_videos', ' average_token_length', ' num_keywords', ' global_subjectivity', ' global_sentiment_polarity', ' global_rate_positive_words', ' global_rate_negative_words', ' rate_positive_words', ' rate_negative_words'], 'categorical_cols': [], 'text_cols': ['article_title'], 'label_names': [' data_channel_is_tech', ' data_channel_is_entertainment', ' data_channel_is_lifestyle', ' data_channel_is_bus', ' data_channel_is_world', ' data_channel_is_socmed'], 'label_col': 'channel', 'num_labels': 6, 'prob_type': 'single_label_classification', 'wandb_proj_name': 'News Channel', 'all_text_dataset': 'james-burton/news_channel_all_text', 'or

Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--news_channel_all_text-ed95b90d43478033/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabulary, make sure th

In [25]:
results_dict

{('disbert', 'channel', 'all_text'): 'Test acc: 0.3',
 ('bert', 'channel', 'all_text'): 'Test acc: 0.2',
 ('drob', 'channel', 'all_text'): 'Test acc: 0.32',
 ('deberta', 'channel', 'all_text'): 'Test acc: 0.32'}

In [None]:
# results_dict = {}
for text_model_code in ["disbert", "bert", "drob", "deberta"]:  #
    sf = 1
    for ds_type in [
        # "imdb_genre",
        # "fake",
        # "kick",
        # "jigsaw",
        for model_type in [
            # "ensemble_25",
            # "ensemble_50",
            # "ensemble_75",
            # "stack",
            "all_text",
            # "all_as_text_tnt_reorder",
            # "all_as_text_base_reorder",
        ]:
            try:
                sample_preds, sample_y, preds, y = run_shap(
                    model_type,
                    ds_type=ds_type,
                    tab_scale_factor=sf,
                    text_model_code=text_model_code,
                )
                results_dict[
                    (text_model_code, ds_type, model_type)
                ] = f"Test AUC-ROC: {roc_auc_score(y, preds[:, 1])}"
            except:
                results_dict[(text_model_code, ds_type, model_type)] = "Failed"
    with open(f"results_dict_{text_model_code}_roc.txt", "w") as f:
        f.write(str(results_dict))


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:05<00:00, 46.1MB/s] 


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:05<00:00, 45.9MB/s] 


Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 615/615 [00:00<00:00, 992kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:06<00:00, 43.0MB/s] 


Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 615/615 [00:00<00:00, 1.04MB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:06<00:00, 43.2MB/s] 


Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 615/615 [00:00<00:00, 818kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:05<00:00, 46.0MB/s] 


Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 615/615 [00:00<00:00, 912kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:07<00:00, 38.0MB/s] 


Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 615/615 [00:00<00:00, 922kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 268M/268M [00:05<00:00, 46.8MB/s] 


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 823kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:10<00:00, 41.6MB/s] 


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 1.04MB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:09<00:00, 47.5MB/s] 


Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 782kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:12<00:00, 33.9MB/s] 


Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 295kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:09<00:00, 47.3MB/s] 


Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 309kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:10<00:00, 43.4MB/s] 


Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 314kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:10<00:00, 43.7MB/s] 


Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 291kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:12<00:00, 34.8MB/s] 


Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 727/727 [00:00<00:00, 236kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 438M/438M [00:12<00:00, 35.2MB/s] 


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 270kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:06<00:00, 47.0MB/s] 


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 304kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:07<00:00, 44.5MB/s] 


Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 286kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:07<00:00, 46.3MB/s] 


Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 302kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:06<00:00, 48.8MB/s] 


Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 276kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:07<00:00, 45.8MB/s] 


Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 305kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:08<00:00, 39.6MB/s] 


Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 229kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:06<00:00, 47.5MB/s] 


Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Downloading (…)lve/main/config.json: 100%|██████████| 740/740 [00:00<00:00, 302kB/s]
Downloading (…)"pytorch_model.bin";: 100%|██████████| 329M/329M [00:07<00:00, 45.6MB/s] 


Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabula

Using dataset imdb_genre, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--imdb_genre_prediction_ordinal-95c476e18d2d7064/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabula

Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabulary, ma

Using dataset fake, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--fake_job_postings2_ordinal-d873cc356e36f3d4/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabulary, ma

Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabulary

Using dataset kick, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--kick_starter_funding_ordinal-bc45dae77b1c676d/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in the vocabulary

Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in th

Using dataset jigsaw, ordinal version


Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Found cached dataset parquet (/home/james/.cache/huggingface/datasets/james-burton___parquet/james-burton--jigsaw_unintended_bias100K_ordinal-8e97391c4f489562/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The sentencepiece tokenizer that you are converting to a fast tokenizer uses the byte fallback option which is not implemented in the fast tokenizers. In practice this means that the fast version of the tokenizer can produce unknown tokens whereas the sentencepiece version would have converted these unknown tokens into a sequence of byte tokens matching the original piece of text.
Special tokens have been added in th

In [15]:
results_dict

{('disbert',
  'channel',
  'all_as_text_tnt_reorder'): 'Test acc: 0.3174916190100572',
 ('disbert', 'channel', 'all_as_text_base_reorder'): 'Failed',
 ('disbert', 'prod_sent', 'all_as_text_tnt_reorder'): 'Failed',
 ('disbert', 'prod_sent', 'all_as_text_base_reorder'): 'Failed',
 ('disbert',
  'wine',
  'all_as_text_tnt_reorder'): 'Test acc: 0.8162712186771908',
 ('disbert', 'wine', 'all_as_text_base_reorder'): 'Failed',
 ('disbert',
  'salary',
  'all_as_text_tnt_reorder'): 'Test acc: 0.47058823529411764',
 ('disbert',
  'salary',
  'all_as_text_base_reorder'): 'Test acc: 0.4627619288058571',
 ('disbert',
  'airbnb',
  'all_as_text_tnt_reorder'): 'Test acc: 0.3771565844070758',
 ('disbert',
  'airbnb',
  'all_as_text_base_reorder'): 'Test acc: 0.35837519108975757',
 ('disbert',
  'imdb_genre',
  'all_as_text_tnt_reorder'): 'Test AUC-ROC: 0.813432088879992',
 ('disbert',
  'imdb_genre',
  'all_as_text_base_reorder'): 'Test AUC-ROC: 0.8150335301771595',
 ('disbert',
  'fake',
  'all_as_

In [16]:
results_dict

{('disbert',
  'channel',
  'all_as_text_tnt_reorder'): 'Test acc: 0.3174916190100572',
 ('disbert', 'channel', 'all_as_text_base_reorder'): 'Failed',
 ('disbert', 'prod_sent', 'all_as_text_tnt_reorder'): 'Failed',
 ('disbert', 'prod_sent', 'all_as_text_base_reorder'): 'Failed',
 ('disbert',
  'wine',
  'all_as_text_tnt_reorder'): 'Test acc: 0.8162712186771908',
 ('disbert', 'wine', 'all_as_text_base_reorder'): 'Failed',
 ('disbert',
  'salary',
  'all_as_text_tnt_reorder'): 'Test acc: 0.47058823529411764',
 ('disbert',
  'salary',
  'all_as_text_base_reorder'): 'Test acc: 0.4627619288058571',
 ('disbert',
  'airbnb',
  'all_as_text_tnt_reorder'): 'Test acc: 0.3771565844070758',
 ('disbert',
  'airbnb',
  'all_as_text_base_reorder'): 'Test acc: 0.35837519108975757',
 ('disbert',
  'imdb_genre',
  'all_as_text_tnt_reorder'): 'Test AUC-ROC: 0.813432088879992',
 ('disbert',
  'imdb_genre',
  'all_as_text_base_reorder'): 'Test AUC-ROC: 0.8150335301771595',
 ('disbert',
  'fake',
  'all_as_

In [19]:
full_dict = {
    ("disbert", "wine", "ensemble_25"): "Test acc: 0.7932100232989396",
    ("disbert", "wine", "ensemble_50"): "Test acc: 0.8218819837382911",
    ("disbert", "wine", "ensemble_75"): "Test acc: 0.8222623745898912",
    ("disbert", "wine", "stack"): "Test acc: 0.022062669392801104",
    ("disbert", "wine", "all_text"): "Test acc: 0.818838856925491",
    ("disbert", "salary", "ensemble_25"): "Test acc: 0.3940924009088614",
    ("disbert", "salary", "ensemble_50"): "Test acc: 0.45013885382479174",
    ("disbert", "salary", "ensemble_75"): "Test acc: 0.4564503913153244",
    ("disbert", "salary", "stack"): "Test acc: 0.17116889674324665",
    ("disbert", "salary", "all_text"): "Test acc: 0.45796516031305223",
    ("disbert", "airbnb", "ensemble_25"): "Test acc: 0.41996069010701026",
    ("disbert", "airbnb", "ensemble_50"): "Test acc: 0.419305525223848",
    ("disbert", "airbnb", "ensemble_75"): "Test acc: 0.38938632889277136",
    ("disbert", "airbnb", "stack"): "Test acc: 0.37235204193055255",
    ("disbert", "airbnb", "all_text"): "Test acc: 0.37999563223411226",
    ("disbert", "channel", "ensemble_25"): "Test acc: 0.5452573456911852",
    ("disbert", "channel", "ensemble_50"): "Test acc: 0.545848944981266",
    ("disbert", "channel", "ensemble_75"): "Test acc: 0.4811674225990929",
    ("disbert", "channel", "stack"): "Test acc: 0.44922106093472686",
    ("disbert", "channel", "all_text"): "Test acc: 0.3190692171169395",
    ("bert", "imdb_genre", "ensemble_25"): "Test acc: 0.765",
    ("bert", "imdb_genre", "ensemble_50"): "Test acc: 0.815",
    ("bert", "imdb_genre", "ensemble_75"): "Test acc: 0.755",
    ("bert", "imdb_genre", "stack"): "Test acc: 0.73",
    ("bert", "imdb_genre", "all_text"): "Test acc: 0.76",
    ("bert", "channel", "ensemble_25"): "Test acc: 0.5436797475843029",
    ("bert", "channel", "ensemble_50"): "Test acc: 0.543088148294222",
    ("bert", "channel", "ensemble_75"): "Test acc: 0.5020705975152829",
    ("bert", "channel", "stack"): "Test acc: 0.4661802405837113",
    ("bert", "channel", "all_text"): "Test acc: 0.25379609544468545",
    ("bert", "prod_sent", "ensemble_25"): "Test acc: 0.8908091123330715",
    ("bert", "prod_sent", "ensemble_50"): "Test acc: 0.8813825608798115",
    ("bert", "prod_sent", "ensemble_75"): "Test acc: 0.7847604084838963",
    ("bert", "prod_sent", "stack"): "Test acc: 0.7297721916732128",
    ("bert", "prod_sent", "all_text"): "Test acc: 0.9049489395129615",
    ("bert", "wine", "ensemble_25"): "Test acc: 0.7963957966810898",
    ("bert", "wine", "ensemble_50"): "Test acc: 0.8259236365365413",
    ("bert", "wine", "ensemble_75"): "Test acc: 0.8254481479720412",
    ("bert", "wine", "stack"): "Test acc: 0.07170367552660359",
    ("bert", "wine", "all_text"): "Test acc: 0.8256858922542912",
    ("bert", "salary", "ensemble_25"): "Test acc: 0.43120424135319363",
    ("bert", "salary", "ensemble_50"): "Test acc: 0.46705377429941936",
    ("bert", "salary", "ensemble_75"): "Test acc: 0.4786670032819995",
    ("bert", "salary", "stack"): "Test acc: 0.23882857864175713",
    ("bert", "salary", "all_text"): "Test acc: 0.4809391567785913",
    ("bert", "airbnb", "ensemble_25"): "Test acc: 0.4162480891024241",
    ("bert", "airbnb", "ensemble_50"): "Test acc: 0.42148940816772223",
    ("bert", "airbnb", "ensemble_75"): "Test acc: 0.4035815680279537",
    ("bert", "airbnb", "stack"): "Test acc: 0.36427167503821795",
    ("bert", "airbnb", "all_text"): "Test acc: 0.3869840576545097",
    ("drob", "imdb_genre", "ensemble_25"): "Test acc: 0.775",
    ("drob", "imdb_genre", "ensemble_50"): "Test acc: 0.8",
    ("drob", "imdb_genre", "ensemble_75"): "Test acc: 0.72",
    ("drob", "imdb_genre", "stack"): "Test acc: 0.775",
    ("drob", "imdb_genre", "all_text"): "Test acc: 0.675",
    ("drob", "channel", "ensemble_25"): "Test acc: 0.5436797475843029",
    ("drob", "channel", "ensemble_50"): "Test acc: 0.5466377440347071",
    ("drob", "channel", "ensemble_75"): "Test acc: 0.4878722145533425",
    ("drob", "channel", "stack"): "Test acc: 0.45927824886610136",
    ("drob", "channel", "all_text"): "Test acc: 0.31275882468941035",
    ("drob", "prod_sent", "ensemble_25"): "Test acc: 0.8908091123330715",
    ("drob", "prod_sent", "ensemble_50"): "Test acc: 0.8845247446975648",
    ("drob", "prod_sent", "ensemble_75"): "Test acc: 0.795758051846033",
    ("drob", "prod_sent", "stack"): "Test acc: 0.8853102906520032",
    ("drob", "prod_sent", "all_text"): "Test acc: 0.9041633935585232",
    ("drob", "wine", "ensemble_25"): "Test acc: 0.7975369692358899",
    ("drob", "wine", "ensemble_50"): "Test acc: 0.8248300128381912",
    ("drob", "wine", "ensemble_75"): "Test acc: 0.8244971708430412",
    ("drob", "wine", "stack"): "Test acc: 0.036897912605201845",
    ("drob", "wine", "all_text"): "Test acc: 0.821026104322191",
    ("drob", "salary", "ensemble_25"): "Test acc: 0.43524362534713457",
    ("drob", "salary", "ensemble_50"): "Test acc: 0.46755869729866195",
    ("drob", "salary", "ensemble_75"): "Test acc: 0.47109315829336024",
    ("drob", "salary", "stack"): "Test acc: 0.3292097955061853",
    ("drob", "salary", "all_text"): "Test acc: 0.47134561979298156",
    ("drob", "airbnb", "ensemble_25"): "Test acc: 0.4188687486350732",
    ("drob", "airbnb", "ensemble_50"): "Test acc: 0.41362742956977505",
    ("drob", "airbnb", "ensemble_75"): "Test acc: 0.3865472810657349",
    ("drob", "airbnb", "stack"): "Test acc: 0.3795588556453374",
    ("drob", "airbnb", "all_text"): "Test acc: 0.3896047171871588",
    ("deberta", "imdb_genre", "ensemble_25"): "Test acc: 0.785",
    ("deberta", "imdb_genre", "ensemble_50"): "Test acc: 0.815",
    ("deberta", "imdb_genre", "ensemble_75"): "Test acc: 0.775",
    ("deberta", "imdb_genre", "stack"): "Test acc: 0.765",
    ("deberta", "imdb_genre", "all_text"): "Test acc: 0.715",
    ("deberta", "channel", "ensemble_25"): "Test acc: 0.5438769473476632",
    ("deberta", "channel", "ensemble_50"): "Test acc: 0.5401301518438177",
    ("deberta", "channel", "ensemble_75"): "Test acc: 0.4454742654308815",
    ("deberta", "channel", "stack"): "Test acc: 0.44685466377440347",
    ("deberta", "channel", "all_text"): "Test acc: 0.3174916190100572",
    ("deberta", "prod_sent", "ensemble_25"): "Test acc: 0.8908091123330715",
    ("deberta", "prod_sent", "ensemble_50"): "Test acc: 0.8837391987431265",
    ("deberta", "prod_sent", "ensemble_75"): "Test acc: 0.8114689709347996",
    ("deberta", "prod_sent", "stack"): "Test acc: 0.8735271013354281",
    ("deberta", "prod_sent", "all_text"): "Test acc: 0.8884524744697565",
    ("deberta", "wine", "ensemble_25"): "Test acc: 0.7807997717654891",
    ("deberta", "wine", "ensemble_50"): "Test acc: 0.8107555513289906",
    ("deberta", "wine", "ensemble_75"): "Test acc: 0.8100898673386905",
    ("deberta", "wine", "stack"): "Test acc: 0.07793257572155389",
    ("deberta", "wine", "all_text"): "Test acc: 0.8166516095287908",
    ("deberta", "salary", "ensemble_25"): "Test acc: 0.43776824034334766",
    ("deberta", "salary", "ensemble_50"): "Test acc: 0.4683160817975259",
    ("deberta", "salary", "ensemble_75"): "Test acc: 0.4758899267861651",
    ("deberta", "salary", "stack"): "Test acc: 0.2774551880838172",
    ("deberta", "salary", "all_text"): "Test acc: 0.45821762181267356",
    ("deberta", "airbnb", "ensemble_25"): "Test acc: 0.4182135837519109",
    ("deberta", "airbnb", "ensemble_50"): "Test acc: 0.41799519545752345",
    ("deberta", "airbnb", "ensemble_75"): "Test acc: 0.4003057436121424",
    ("deberta", "airbnb", "stack"): "Test acc: 0.3509499890805853",
    ("deberta", "airbnb", "all_text"): "Test acc: 0.3771565844070758",
    ("disbert", "imdb_genre", "ensemble_25"): "Test AUC-ROC: 0.8530677609848865",
    ("disbert", "imdb_genre", "ensemble_50"): "Test AUC-ROC: 0.8646782103893504",
    ("disbert", "imdb_genre", "ensemble_75"): "Test AUC-ROC: 0.8520668601741568",
    ("disbert", "imdb_genre", "stack"): "Test AUC-ROC: 0.8106295666099489",
    ("disbert", "imdb_genre", "all_text"): "Test AUC-ROC: 0.8146331698528675",
    ("disbert", "fake", "ensemble_25"): "Test AUC-ROC: 0.8744072444723762",
    ("disbert", "fake", "ensemble_50"): "Test AUC-ROC: 0.9189305642842179",
    ("disbert", "fake", "ensemble_75"): "Test AUC-ROC: 0.9337994439048545",
    ("disbert", "fake", "stack"): "Test AUC-ROC: 0.9087501666380999",
    ("disbert", "fake", "all_text"): "Test AUC-ROC: 0.9606853110895275",
    ("disbert", "kick", "ensemble_25"): "Test AUC-ROC: 0.7410626481173135",
    ("disbert", "kick", "ensemble_50"): "Test AUC-ROC: 0.7735201424600657",
    ("disbert", "kick", "ensemble_75"): "Test AUC-ROC: 0.7679046143380706",
    ("disbert", "kick", "stack"): "Test AUC-ROC: 0.7491010899483486",
    ("disbert", "kick", "all_text"): "Test AUC-ROC: 0.7875153407217366",
    ("disbert", "jigsaw", "ensemble_25"): "Test AUC-ROC: 0.9321638495642937",
    ("disbert", "jigsaw", "ensemble_50"): "Test AUC-ROC: 0.9456286008527085",
    ("disbert", "jigsaw", "ensemble_75"): "Test AUC-ROC: 0.9511750950554884",
    ("disbert", "jigsaw", "stack"): "Test AUC-ROC: 0.9158202495728693",
    ("disbert", "jigsaw", "all_text"): "Test AUC-ROC: 0.9624062142594692",
    ("bert", "imdb_genre", "ensemble_25"): "Test AUC-ROC: 0.8507656891202082",
    ("bert", "imdb_genre", "ensemble_50"): "Test AUC-ROC: 0.8609748773896507",
    ("bert", "imdb_genre", "ensemble_75"): "Test AUC-ROC: 0.8573716344710239",
    ("bert", "imdb_genre", "stack"): "Test AUC-ROC: 0.8216394755279752",
    ("bert", "imdb_genre", "all_text"): "Test AUC-ROC: 0.8282454208787909",
    ("bert", "fake", "ensemble_25"): "Test AUC-ROC: 0.8867003751737798",
    ("bert", "fake", "ensemble_50"): "Test AUC-ROC: 0.9280956597916548",
    ("bert", "fake", "ensemble_75"): "Test AUC-ROC: 0.939410386790836",
    ("bert", "fake", "stack"): "Test AUC-ROC: 0.9047996533927518",
    ("bert", "fake", "all_text"): "Test AUC-ROC: 0.961777980917557",
    ("bert", "kick", "ensemble_25"): "Test AUC-ROC: 0.7479672806259093",
    ("bert", "kick", "ensemble_50"): "Test AUC-ROC: 0.7824814562315308",
    ("bert", "kick", "ensemble_75"): "Test AUC-ROC: 0.7755384670023259",
    ("bert", "kick", "stack"): "Test AUC-ROC: 0.755045998173823",
    ("bert", "kick", "all_text"): "Test AUC-ROC: 0.7805312613270808",
    ("bert", "jigsaw", "ensemble_25"): "Test AUC-ROC: 0.9270898863030642",
    ("bert", "jigsaw", "ensemble_50"): "Test AUC-ROC: 0.9410218751455497",
    ("bert", "jigsaw", "ensemble_75"): "Test AUC-ROC: 0.9486580449509142",
    ("bert", "jigsaw", "stack"): "Test AUC-ROC: 0.9313145418077804",
    ("bert", "jigsaw", "all_text"): "Test AUC-ROC: 0.9605167115052071",
    ("drob", "imdb_genre", "ensemble_25"): "Test AUC-ROC: 0.8459613652287059",
    ("drob", "imdb_genre", "ensemble_50"): "Test AUC-ROC: 0.8531678510659593",
    ("drob", "imdb_genre", "ensemble_75"): "Test AUC-ROC: 0.831148033229907",
    ("drob", "imdb_genre", "stack"): "Test AUC-ROC: 0.8307977179461515",
    ("drob", "imdb_genre", "all_text"): "Test AUC-ROC: 0.77219497547793",
    ("drob", "fake", "ensemble_25"): "Test AUC-ROC: 0.8650659886876535",
    ("drob", "fake", "ensemble_50"): "Test AUC-ROC: 0.9080157687253614",
    ("drob", "fake", "ensemble_75"): "Test AUC-ROC: 0.9266102001561637",
    ("drob", "fake", "stack"): "Test AUC-ROC: 0.9188996172084785",
    ("drob", "fake", "all_text"): "Test AUC-ROC: 0.95783103848864",
    ("drob", "kick", "ensemble_25"): "Test AUC-ROC: 0.741089815412633",
    ("drob", "kick", "ensemble_50"): "Test AUC-ROC: 0.7837713373884283",
    ("drob", "kick", "ensemble_75"): "Test AUC-ROC: 0.7826797489730115",
    ("drob", "kick", "stack"): "Test AUC-ROC: 0.7702470339960303",
    ("drob", "kick", "all_text"): "Test AUC-ROC: 0.7951050305532272",
    ("drob", "jigsaw", "ensemble_25"): "Test AUC-ROC: 0.9411059187612884",
    ("drob", "jigsaw", "ensemble_50"): "Test AUC-ROC: 0.9550027464992299",
    ("drob", "jigsaw", "ensemble_75"): "Test AUC-ROC: 0.9606921917359577",
    ("drob", "jigsaw", "stack"): "Test AUC-ROC: 0.903355105716193",
    ("drob", "jigsaw", "all_text"): "Test AUC-ROC: 0.9638602397845385",
    ("deberta", "imdb_genre", "ensemble_25"): "Test AUC-ROC: 0.8565709138224402",
    ("deberta", "imdb_genre", "ensemble_50"): "Test AUC-ROC: 0.8721849664698227",
    ("deberta", "imdb_genre", "ensemble_75"): "Test AUC-ROC: 0.8587728956060454",
    ("deberta", "imdb_genre", "stack"): "Test AUC-ROC: 0.8202382143929536",
    ("deberta", "imdb_genre", "all_text"): "Test AUC-ROC: 0.7971174056650985",
    ("deberta", "fake", "ensemble_25"): "Test AUC-ROC: 0.8713077758098612",
    ("deberta", "fake", "ensemble_50"): "Test AUC-ROC: 0.9077253423222685",
    ("deberta", "fake", "ensemble_75"): "Test AUC-ROC: 0.9214063303433697",
    ("deberta", "fake", "stack"): "Test AUC-ROC: 0.9052674303452741",
    ("deberta", "fake", "all_text"): "Test AUC-ROC: 0.9593783922756098",
    ("deberta", "kick", "ensemble_25"): "Test AUC-ROC: 0.7472761910425445",
    ("deberta", "kick", "ensemble_50"): "Test AUC-ROC: 0.7790206299178409",
    ("deberta", "kick", "ensemble_75"): "Test AUC-ROC: 0.7694822249784785",
    ("deberta", "kick", "stack"): "Test AUC-ROC: 0.7381109209892067",
    ("deberta", "kick", "all_text"): "Test AUC-ROC: 0.7759173440890258",
    ("deberta", "jigsaw", "ensemble_25"): "Test AUC-ROC: 0.9265948954297217",
    ("deberta", "jigsaw", "ensemble_50"): "Test AUC-ROC: 0.9361112232382668",
    ("deberta", "jigsaw", "ensemble_75"): "Test AUC-ROC: 0.9439736483933905",
    ("deberta", "jigsaw", "stack"): "Test AUC-ROC: 0.9279894664546353",
    ("deberta", "jigsaw", "all_text"): "Test AUC-ROC: 0.9554290711953223",
    ("disbert", "channel", "all_as_text_tnt_reorder"): "Test acc: 0.3174916190100572",
    ("disbert", "channel", "all_as_text_base_reorder"): "Failed",
    ("disbert", "prod_sent", "all_as_text_tnt_reorder"): "Failed",
    ("disbert", "prod_sent", "all_as_text_base_reorder"): "Failed",
    ("disbert", "wine", "all_as_text_tnt_reorder"): "Test acc: 0.8162712186771908",
    ("disbert", "wine", "all_as_text_base_reorder"): "Failed",
    ("disbert", "salary", "all_as_text_tnt_reorder"): "Test acc: 0.47058823529411764",
    ("disbert", "salary", "all_as_text_base_reorder"): "Test acc: 0.4627619288058571",
    ("disbert", "airbnb", "all_as_text_tnt_reorder"): "Test acc: 0.3771565844070758",
    ("disbert", "airbnb", "all_as_text_base_reorder"): "Test acc: 0.35837519108975757",
    (
        "disbert",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.813432088879992",
    (
        "disbert",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.8150335301771595",
    ("disbert", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9577096307299702",
    ("disbert", "fake", "all_as_text_base_reorder"): "Failed",
    ("disbert", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7864773150429295",
    ("disbert", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.7838657149794981",
    (
        "disbert",
        "jigsaw",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.9630512386600633",
    (
        "disbert",
        "jigsaw",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.9641984695012904",
    (
        "bert",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.8370533480132119",
    (
        "bert",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.8040236212591332",
    ("bert", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9409815460206821",
    ("bert", "fake", "all_as_text_base_reorder"): "Test AUC-ROC: 0.9554647774667201",
    ("bert", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7878662786590875",
    ("bert", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.5642941525675974",
    ("bert", "jigsaw", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9569814383967247",
    ("bert", "jigsaw", "all_as_text_base_reorder"): "Test AUC-ROC: 0.962315309940405",
    (
        "drob",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.7601841657491742",
    (
        "drob",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.7603843459113202",
    ("drob", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9621921956236075",
    ("drob", "fake", "all_as_text_base_reorder"): "Test AUC-ROC: 0.9580000571330629",
    ("drob", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7887800850018754",
    ("drob", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.7903838108563148",
    ("drob", "jigsaw", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9594309460599381",
    ("drob", "jigsaw", "all_as_text_base_reorder"): "Test AUC-ROC: 0.9577867134495661",
    (
        "deberta",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.7773996596937245",
    (
        "deberta",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.767090381343209",
    ("deberta", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9562908263345332",
    ("deberta", "fake", "all_as_text_base_reorder"): "Test AUC-ROC: 0.954738711458988",
    ("deberta", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7915710206785299",
    ("deberta", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.789079392295823",
    (
        "deberta",
        "jigsaw",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.9569549714593263",
    (
        "deberta",
        "jigsaw",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.9619605346912794",
    ("disbert", "channel", "all_as_text_tnt_reorder"): "Test acc: 0.3174916190100572",
    ("disbert", "channel", "all_as_text_base_reorder"): "Failed",
    ("disbert", "prod_sent", "all_as_text_tnt_reorder"): "Failed",
    ("disbert", "prod_sent", "all_as_text_base_reorder"): "Failed",
    ("disbert", "wine", "all_as_text_tnt_reorder"): "Test acc: 0.8162712186771908",
    ("disbert", "wine", "all_as_text_base_reorder"): "Failed",
    ("disbert", "salary", "all_as_text_tnt_reorder"): "Test acc: 0.47058823529411764",
    ("disbert", "salary", "all_as_text_base_reorder"): "Test acc: 0.4627619288058571",
    ("disbert", "airbnb", "all_as_text_tnt_reorder"): "Test acc: 0.3771565844070758",
    ("disbert", "airbnb", "all_as_text_base_reorder"): "Test acc: 0.35837519108975757",
    (
        "disbert",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.813432088879992",
    (
        "disbert",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.8150335301771595",
    ("disbert", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9577096307299702",
    ("disbert", "fake", "all_as_text_base_reorder"): "Failed",
    ("disbert", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7864773150429295",
    ("disbert", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.7838657149794981",
    (
        "disbert",
        "jigsaw",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.9630512386600633",
    (
        "disbert",
        "jigsaw",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.9641984695012904",
    (
        "bert",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.8370533480132119",
    (
        "bert",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.8040236212591332",
    ("bert", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9409815460206821",
    ("bert", "fake", "all_as_text_base_reorder"): "Test AUC-ROC: 0.9554647774667201",
    ("bert", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7878662786590875",
    ("bert", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.5642941525675974",
    ("bert", "jigsaw", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9569814383967247",
    ("bert", "jigsaw", "all_as_text_base_reorder"): "Test AUC-ROC: 0.962315309940405",
    (
        "drob",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.7601841657491742",
    (
        "drob",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.7603843459113202",
    ("drob", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9621921956236075",
    ("drob", "fake", "all_as_text_base_reorder"): "Test AUC-ROC: 0.9580000571330629",
    ("drob", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7887800850018754",
    ("drob", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.7903838108563148",
    ("drob", "jigsaw", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9594309460599381",
    ("drob", "jigsaw", "all_as_text_base_reorder"): "Test AUC-ROC: 0.9577867134495661",
    (
        "deberta",
        "imdb_genre",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.7773996596937245",
    (
        "deberta",
        "imdb_genre",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.767090381343209",
    ("deberta", "fake", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.9562908263345332",
    ("deberta", "fake", "all_as_text_base_reorder"): "Test AUC-ROC: 0.954738711458988",
    ("deberta", "kick", "all_as_text_tnt_reorder"): "Test AUC-ROC: 0.7915710206785299",
    ("deberta", "kick", "all_as_text_base_reorder"): "Test AUC-ROC: 0.789079392295823",
    (
        "deberta",
        "jigsaw",
        "all_as_text_tnt_reorder",
    ): "Test AUC-ROC: 0.9569549714593263",
    (
        "deberta",
        "jigsaw",
        "all_as_text_base_reorder",
    ): "Test AUC-ROC: 0.9619605346912794",
    ("bert", "channel", "all_as_text_tnt_reorder"): "Test acc: 0.25379609544468545",
    ("bert", "channel", "all_as_text_base_reorder"): "Test acc: 0.25379609544468545",
    ("bert", "prod_sent", "all_as_text_tnt_reorder"): "Failed",
    ("bert", "prod_sent", "all_as_text_base_reorder"): "Failed",
    ("bert", "wine", "all_as_text_tnt_reorder"): "Test acc: 0.8285388236412914",
    ("bert", "wine", "all_as_text_base_reorder"): "Failed",
    ("bert", "salary", "all_as_text_tnt_reorder"): "Test acc: 0.47311285029033073",
    ("bert", "salary", "all_as_text_base_reorder"): "Test acc: 0.4690734662963898",
    ("bert", "airbnb", "all_as_text_tnt_reorder"): "Test acc: 0.37366237169687705",
    ("bert", "airbnb", "all_as_text_base_reorder"): "Test acc: 0.37824852587901286",
    ("drob", "channel", "all_as_text_tnt_reorder"): "Test acc: 0.2991520410175508",
    ("drob", "channel", "all_as_text_base_reorder"): "Test acc: 0.3206468152238217",
    ("drob", "prod_sent", "all_as_text_tnt_reorder"): "Failed",
    ("drob", "prod_sent", "all_as_text_base_reorder"): "Test acc: 0.9073055773762765",
    ("drob", "wine", "all_as_text_tnt_reorder"): "Test acc: 0.8256383433978413",
    ("drob", "wine", "all_as_text_base_reorder"): "Failed",
    ("drob", "salary", "all_as_text_tnt_reorder"): "Test acc: 0.4698308507952537",
    ("drob", "salary", "all_as_text_base_reorder"): "Failed",
    ("drob", "airbnb", "all_as_text_tnt_reorder"): "Test acc: 0.3712601004586154",
    ("drob", "airbnb", "all_as_text_base_reorder"): "Test acc: 0.3655820048045425",
    ("deberta", "channel", "all_as_text_tnt_reorder"): "Test acc: 0.3320844014987182",
    ("deberta", "channel", "all_as_text_base_reorder"): "Test acc: 0.25379609544468545",
    ("deberta", "prod_sent", "all_as_text_tnt_reorder"): "Failed",
    ("deberta", "prod_sent", "all_as_text_base_reorder"): "Failed",
    ("deberta", "wine", "all_as_text_tnt_reorder"): "Test acc: 0.8185535637867909",
    ("deberta", "wine", "all_as_text_base_reorder"): "Test acc: 0.821073653178641",
    ("deberta", "salary", "all_as_text_tnt_reorder"): "Test acc: 0.45947992931078013",
    ("deberta", "salary", "all_as_text_base_reorder"): "Test acc: 0.45114869982327693",
    ("deberta", "airbnb", "all_as_text_tnt_reorder"): "Test acc: 0.3808691854116619",
    ("deberta", "airbnb", "all_as_text_base_reorder"): "Test acc: 0.3775933609958506",
}

In [20]:
[sorted(full_dict)

[('bert', 'airbnb', 'all_as_text_base_reorder'),
 ('bert', 'airbnb', 'all_as_text_tnt_reorder'),
 ('bert', 'airbnb', 'all_text'),
 ('bert', 'airbnb', 'ensemble_25'),
 ('bert', 'airbnb', 'ensemble_50'),
 ('bert', 'airbnb', 'ensemble_75'),
 ('bert', 'airbnb', 'stack'),
 ('bert', 'channel', 'all_as_text_base_reorder'),
 ('bert', 'channel', 'all_as_text_tnt_reorder'),
 ('bert', 'channel', 'all_text'),
 ('bert', 'channel', 'ensemble_25'),
 ('bert', 'channel', 'ensemble_50'),
 ('bert', 'channel', 'ensemble_75'),
 ('bert', 'channel', 'stack'),
 ('bert', 'fake', 'all_as_text_base_reorder'),
 ('bert', 'fake', 'all_as_text_tnt_reorder'),
 ('bert', 'fake', 'all_text'),
 ('bert', 'fake', 'ensemble_25'),
 ('bert', 'fake', 'ensemble_50'),
 ('bert', 'fake', 'ensemble_75'),
 ('bert', 'fake', 'stack'),
 ('bert', 'imdb_genre', 'all_as_text_base_reorder'),
 ('bert', 'imdb_genre', 'all_as_text_tnt_reorder'),
 ('bert', 'imdb_genre', 'all_text'),
 ('bert', 'imdb_genre', 'ensemble_25'),
 ('bert', 'imdb_genre

In [22]:
sorted(
    full_dict.items(),
    key=lambda x: (
        x[0][1],
        x[0][0],
        float(x[1].split(": ")[-1]) if "Failed" not in x[1] else float("-inf"),
    ),
    reverse=True,
)

[(('drob', 'wine', 'all_as_text_tnt_reorder'), 'Test acc: 0.8256383433978413'),
 (('drob', 'wine', 'ensemble_50'), 'Test acc: 0.8248300128381912'),
 (('drob', 'wine', 'ensemble_75'), 'Test acc: 0.8244971708430412'),
 (('drob', 'wine', 'all_text'), 'Test acc: 0.821026104322191'),
 (('drob', 'wine', 'ensemble_25'), 'Test acc: 0.7975369692358899'),
 (('drob', 'wine', 'stack'), 'Test acc: 0.036897912605201845'),
 (('drob', 'wine', 'all_as_text_base_reorder'), 'Failed'),
 (('disbert', 'wine', 'ensemble_75'), 'Test acc: 0.8222623745898912'),
 (('disbert', 'wine', 'ensemble_50'), 'Test acc: 0.8218819837382911'),
 (('disbert', 'wine', 'all_text'), 'Test acc: 0.818838856925491'),
 (('disbert', 'wine', 'all_as_text_tnt_reorder'),
  'Test acc: 0.8162712186771908'),
 (('disbert', 'wine', 'ensemble_25'), 'Test acc: 0.7932100232989396'),
 (('disbert', 'wine', 'stack'), 'Test acc: 0.022062669392801104'),
 (('disbert', 'wine', 'all_as_text_base_reorder'), 'Failed'),
 (('deberta', 'wine', 'all_as_text_

In [23]:
import csv

In [31]:
models = sorted(set([item[0] for item in full_dict.keys()]))
ds_list = sorted(set([item[1] for item in full_dict.keys()]))
methods = [
    "ensemble_25",
    "ensemble_50",
    "ensemble_75",
    "stack",
    "all_text",
    "all_as_text_base_reorder",
    "all_as_text_tnt_reorder",
]

for model in models:
    # Filter the data for the current model
    model_data = {key: value for key,
                  value in full_dict.items() if key[0] == model}

    # Create a 2D matrix with ds as columns and methods as rows
    # Initialize matrix with an empty cell in the top-left corner
    matrix = [[""] + ds_list]
    for method in methods:
        row = [method]  # First cell in each row is the method
        for ds in ds_list:
            if (model, ds, method) in model_data:
                score = model_data[(model, ds, method)]
                if "Failed" in score:
                    row.append("n/a")
                else:
                    row.append(score.split(": ")[-1])
        matrix.append(row)

    # Write the matrix to a CSV file
    filename = model + ".csv"
    with open(filename, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(matrix)

    print(f"CSV file '{filename}' generated for model '{model}'.")

CSV file 'bert.csv' generated for model 'bert'.
CSV file 'deberta.csv' generated for model 'deberta'.
CSV file 'disbert.csv' generated for model 'disbert'.
CSV file 'drob.csv' generated for model 'drob'.


In [30]:
methods

['all_as_text_base_reorder',
 'all_as_text_tnt_reorder',
 'all_text',
 'ensemble_25',
 'ensemble_50',
 'ensemble_75',
 'stack']

In [None]:
[
    "ensemble_25",
    "ensemble_50",
    "ensemble_75",
    "stack",
    "all_text",
    "all_as_text_base_reorder",
    "all_as_text_tnt_reorder",
]