#### Imports

In [None]:
from __future__ import annotations

import codecs
import itertools
import random
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as torch_f
import typing as t
from sklearn.model_selection import StratifiedGroupKFold
from text_unidecode import unidecode
from torch.utils.data import Dataset as TorchDataset, default_collate as default_collate_fn, DataLoader
from tqdm.notebook import tqdm
from transformers.data.data_collator import DataCollatorWithPadding
from transformers.models.auto.configuration_auto import AutoConfig
from transformers.models.auto.modeling_auto import AutoModel
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer_utils import set_seed as set_huggingface_seed
from transformers.utils.generic import PaddingStrategy

#### Seed

In [None]:
def seed_everything(seed: int):
    np.random.seed(seed % (2 ** 32 - 1))
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = False  # type: ignore
    set_huggingface_seed(seed)

#### Type utilities

In [None]:
def dataframe(x: t.Any) -> pd.DataFrame:
    return t.cast(pd.DataFrame, x)


def series(x: t.Any) -> pd.Series:
    return t.cast(pd.Series, x)


_T = t.TypeVar('_T')


def unwrap_opt(x: t.Optional[_T]) -> _T:
    assert x is not None
    return x


def read_csv(path: Path) -> pd.DataFrame:
    return dataframe(pd.read_csv(path))

#### Validation strategy

In [None]:
class ValStrategy:

    def _copy_with_fold(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        df['fold'] = -1
        return df

    def assign_folds(self, df: pd.DataFrame, num_folds: int) -> pd.DataFrame:
        raise NotImplementedError()


class StratifyAndGroupByColumnValStrategy(ValStrategy):

    def __init__(self, stratify_by: str, group_by: str, seed: int):
        self._stratify_by = stratify_by
        self._group_by = group_by
        self._seed = seed

    def assign_folds(self, df: pd.DataFrame, num_folds: int) -> pd.DataFrame:
        df = self._copy_with_fold(df)
        stratify_series = series(df[self._stratify_by])
        group_series = series(df[self._group_by])
        kf = StratifiedGroupKFold(n_splits=num_folds, shuffle=True, random_state=self._seed)
        for f, (t_, v_) in enumerate(kf.split(X=df, y=stratify_series, groups=group_series.values)):
            df.loc[v_, 'fold'] = f
        return df

#### Tokenization

In [None]:
DISC_TYPE_TO_TOK = {
    'Lead': 'LD',
    'Position': 'PS',
    'Claim': 'CL',
    'Evidence': 'EV',
    'Counterclaim': 'CCL',
    'Rebuttal': 'RB',
    'Concluding Statement': 'CS',
}
TOK_PHR = 'PH'

def parse_special_token_list(df: pd.DataFrame) -> t.List[str]:
    return [
        # f'[{str(tok).replace(" ", "-").upper()}]' for tok in list(df['discourse_type'].unique())
        # TOK_PHR,
        *list(DISC_TYPE_TO_TOK.values()),
    ]


def join_special_token_lists(*special_token_lists: t.List[str]) -> t.List[str]:
    result = []
    for stl in special_token_lists:
        for tok in stl:
            if tok not in result:
                result.append(tok)
    return result


ModelInputValue = t.Union[torch.Tensor, bool]


@dataclass
class TokenizerResult:
    input_ids: torch.Tensor
    attention_mask: torch.Tensor

    def get_model_input(self, device) -> t.Dict[str, ModelInputValue]:
        return {
            'input_ids': self.input_ids.to(device),
            'attention_mask': self.attention_mask.to(device),
        }

    def to_collatable_dict(self) -> t.Dict[str, torch.Tensor]:
        return {
            'input_ids': self.input_ids,
            'attention_mask': self.attention_mask,
        }

    @classmethod
    def from_collateable_dict(cls, val: t.Dict[str, torch.Tensor]) -> TokenizerResult:
        return cls(**val)

    def __len__(self) -> int:
        return len(self.input_ids)


class Tokenizer:
    result_type: t.Type[TokenizerResult]

    def __init__(self, padding_strategy: PaddingStrategy = PaddingStrategy.MAX_LENGTH):
        self._padding_strategy = padding_strategy
        self._special_token_list = []

    @property
    def tokenizer(self) -> PreTrainedTokenizerBase:
        raise NotImplementedError()

    def __len__(self) -> int:
        return len(self.tokenizer)

    @property
    def cls_token(self) -> str:
        return self.tokenizer.cls_token

    @property
    def sep_token(self) -> str:
        return self.tokenizer.sep_token

    @property
    def num_special_tokens(self) -> int:
        return len(self._special_token_list)

    def add_special_token_list(self, tok_list: t.List[str]):
        self._special_token_list.extend(tok_list)
        self.tokenizer.add_special_tokens({'additional_special_tokens': tok_list})

    def _build_result(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor,
            token_type_ids: torch.Tensor,) -> TokenizerResult:
        raise NotImplementedError()

    def tokenize(
            self,
            *texts: str,
            max_len: int) -> TokenizerResult:
        encoding = self.tokenizer(
            *texts,
            truncation=True,
            max_length=max_len,
            padding=self._padding_strategy,
            return_attention_mask=True,
            return_token_type_ids=True)  # type: ignore
        input_ids = torch.tensor(encoding['input_ids'], dtype=torch.long)
        attention_mask = torch.tensor(encoding['attention_mask'], dtype=torch.long)
        token_type_ids = torch.tensor(encoding['token_type_ids'], dtype=torch.long)

        return self._build_result(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids)


##### Deberta

In [None]:
@dataclass
class DebertaTokenizerResult(TokenizerResult):
    token_type_ids: torch.Tensor

    def get_model_input(self, device: str) -> t.Dict[str, ModelInputValue]:
        return {
            **super().get_model_input(device=device),
            'token_type_ids': self.token_type_ids.to(device=device),
        }

    def to_collatable_dict(self) -> t.Dict[str, torch.Tensor]:
        return {
            **super().to_collatable_dict(),
            'token_type_ids': self.token_type_ids,
        }


class DebertaTokenizer(Tokenizer):
    result_type = DebertaTokenizerResult

    def __init__(self, checkpoint: str, padding_strategy: PaddingStrategy = PaddingStrategy.MAX_LENGTH):
        super().__init__(padding_strategy=padding_strategy)
        self._tokenizer = AutoTokenizer.from_pretrained(checkpoint)

    @property
    def tokenizer(self) -> PreTrainedTokenizerBase:
        return self._tokenizer

    def _build_result(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor,
            token_type_ids: torch.Tensor) -> TokenizerResult:
        return DebertaTokenizerResult(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids)

##### Registry

In [None]:
_BACKBONE_TO_TOKENIZER_TYPE = {
    'microsoft/deberta-v3-small': DebertaTokenizer,
    'microsoft/deberta-v3-base': DebertaTokenizer,
    'microsoft/deberta-v3-large': DebertaTokenizer,
}

def get_tokenizer_for_backbone(
        backbone: str,
        checkpoint: str | None = None,
        padding_strategy: PaddingStrategy = PaddingStrategy.MAX_LENGTH) -> Tokenizer:
    checkpoint = checkpoint if checkpoint is not None else backbone
    tokenizer_type = _BACKBONE_TO_TOKENIZER_TYPE.get(backbone)
    if tokenizer_type is None:
        raise ValueError(f'Backbone "{backbone}" is not supported.')
    return tokenizer_type(checkpoint, padding_strategy=padding_strategy)

#### Feature engineering

In [None]:
def replace_encoding_with_utf8(error: UnicodeError) -> t.Tuple[bytes, int]:
    return error.object[error.start : error.end].encode("utf-8"), error.end


def replace_decoding_with_cp1252(error: UnicodeError) -> t.Tuple[str, int]:
    return error.object[error.start : error.end].decode("cp1252"), error.end

# Register the encoding and decoding error handlers for `utf-8` and `cp1252`.
codecs.register_error("replace_encoding_with_utf8", replace_encoding_with_utf8)
codecs.register_error("replace_decoding_with_cp1252", replace_decoding_with_cp1252)

def preprocess_text(text: str) -> str:
    """Resolve the encoding problems and normalize the abnormal characters."""
    text = (
        text.encode("raw_unicode_escape")
        .decode("utf-8", errors="replace_decoding_with_cp1252")
        .encode("cp1252", errors="replace_encoding_with_utf8")
        .decode("utf-8", errors="replace_decoding_with_cp1252")
    )
    text = unidecode(text)
    return text

#### Datasets

In [None]:
@dataclass
class DatasetItem:
    id: t.List[str]
    tokenizer_result: TokenizerResult


TorchCollator = t.Callable[[t.List[t.Dict[str, torch.Tensor]]], t.Dict[str, torch.Tensor]]


class Collator:

    def __init__(self, tokenizer_result_collator: TorchCollator = default_collate_fn):
        self._tokenizer_result_collator = tokenizer_result_collator

    def __call__(
            self,
            item_list: t.List[DatasetItem]) -> DatasetItem:
        assert len(item_list) > 0
        tokenizer_result_type = type(item_list[0].tokenizer_result)
        return DatasetItem(
            id=sum([item.id for item in item_list], []),
            tokenizer_result=tokenizer_result_type.from_collateable_dict(
                self._tokenizer_result_collator([item.tokenizer_result.to_collatable_dict() for item in item_list])))


class Dataset(TorchDataset):
    _CLS_TO_INT_DICT = {
        'Ineffective': 0,
        'Adequate': 1,
        'Effective': 2,
    }

    def __init__(
            self,
            df: pd.DataFrame,
            tokenizer: Tokenizer,
            max_len: int,):
        self._df = df.copy().reset_index(drop=True)
        self._tokenizer = tokenizer
        self._max_len = max_len

    def _get_tokenizer_input(self, row: t.Dict[str, t.Any]) -> str:
        (
            disc_type,
            text,
            essay_id,
            essay_text,
            pos,
         ) = (
            str(row['discourse_type']),
            str(row['discourse_text']),
            str(row['essay_id']),
            str(row['essay_text']),
            int(row['pos']),
         )
        sep = self._tokenizer.sep_token
        max_pos = max([
            int(row['pos'])
            for _, row in self._df[self._df['essay_id'] == essay_id].sort_values('pos').iterrows()
        ])

        tokenizer_input = f'{disc_type} {pos} / {max_pos} {sep} {text} {sep} {essay_text}'

        return tokenizer_input

    def sort_by_tokenizer_input_len(self):
        self._df['_tok_input_len'] = self._df.progress_apply(self._get_tokenizer_input, axis=1)
        self._df = self._df.sort_values('_tok_input_len')

    def __len__(self) -> int:
        return len(self._df)

    def __getitem__(self, idx: int) -> DatasetItem:
        row = self._df.iloc[idx]

        tokenizer_input = self._get_tokenizer_input(row)

        (
            id,
        ) = (
            str(row['discourse_id']),
        )

        tokenizer_result = self._tokenizer.tokenize(
            tokenizer_input, max_len=self._max_len)

        return DatasetItem(
            id=[id],
            tokenizer_result=tokenizer_result)

#### Reporting

In [None]:
def report_k_fold_distribution(df: pd.DataFrame, stratify_by: str) -> pd.DataFrame:
    cls_list = sorted(df[stratify_by].unique())
    row_list = []
    for fold in sorted(series(df['fold']).unique()):
        fold_df = dataframe(df[df['fold'] == fold])
        row_list.append({
            'fold': fold,
            'num_samples': len(fold_df),
            'mean_discourse_text_len': fold_df['discourse_text_len'].mean(),
            'std_discourse_text_len': fold_df['discourse_text_len'].std(),
            'mean_essay_text_len': fold_df['essay_text_len'].mean(),
            'std_essay_text_len': fold_df['essay_text_len'].std(),
            **{
                f'num_{cls.lower()}': len(fold_df[fold_df[stratify_by] == cls]) for cls in cls_list
            }
        })
    return pd.DataFrame(row_list)

#### Model

In [None]:
class Model(torch.nn.Module):

    @property
    def backbone(self) -> torch.nn.Module:
        raise NotImplementedError()

    @property
    def backbone_named_parameters(self) -> t.Iterator[t.Tuple[str, torch.nn.parameter.Parameter]]:
        return self.backbone.named_parameters()

    @property
    def head_named_parameters(self) -> t.Iterator[t.Tuple[str, torch.nn.parameter.Parameter]]:
        raise NotImplementedError()

    def resize_token_embeddings(self, num_tokens: int):
        raise NotImplementedError()

    def forward(self, **inputs: ModelInputValue) -> torch.Tensor:
        raise NotImplementedError()

    def load_backbone(self, from_checkpoint: str):
        self.backbone.load_state_dict(
            torch.load(from_checkpoint, map_location=self.backbone.device))


class ModelBuilder:

    def __init__(
            self,
            backbone_checkpoint: str,
            num_classes: int,
            enable_gradient_checkpointing: bool = False):
        self._backbone_checkpoint = backbone_checkpoint
        self._num_classes = num_classes
        self._enable_gradient_checkpointing = enable_gradient_checkpointing

    def build(self) -> Model:
        raise NotImplementedError()


class ClsTokenPooler(torch.nn.Module):

    def forward(self, features: torch.Tensor, mask: torch.Tensor | None) -> torch.Tensor:
        return features[:, 0, :]


class AttentionHeadPooler(torch.nn.Module):
    def __init__(self, h_size: int, hidden_dim: int | None = None):
        super().__init__()
        hidden_dim = hidden_dim if hidden_dim is not None else h_size
        self._attention = torch.nn.Sequential(
            torch.nn.Linear(h_size, hidden_dim),
            torch.nn.LayerNorm(hidden_dim),
            torch.nn.GELU(),
            torch.nn.Linear(hidden_dim, 1))

    def forward(self, features: torch.Tensor, mask: torch.Tensor | None) -> torch.Tensor:
        score = self._attention(features)
        if mask is not None:
            score[mask == 0] = float('-inf')
        attention_weights = torch.softmax(score, dim=1)
        context_vector = attention_weights * features
        context_vector = torch.sum(context_vector, dim=1)
        return context_vector


class MultiStagedDropout(torch.nn.Module):

    def __init__(
            self,
            classifier: torch.nn.Module,
            num_stages: int,
            start_prob: float,
            increment: float,
            dropout_cls: t.Type[torch.nn.Module] = StableDropout):
        super().__init__()
        self._classifier = classifier
        self._dropout_list = torch.nn.ModuleList([
            dropout_cls(start_prob + (increment * i)) for i in range(num_stages)
        ])

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return torch.stack([self._classifier(drop(x)) for drop in self._dropout_list], dim=0).mean(dim=0)


class _AutoModel(Model):

    def __init__(self, backbone_checkpoint: str, num_classes: int):
        super().__init__()
        config = AutoConfig.from_pretrained(backbone_checkpoint)
        config.output_hidden_states = True
        print(f'Original hidden dropout: {config.hidden_dropout_prob}')
        self._transformer = AutoModel.from_pretrained(backbone_checkpoint, config=config)
        self._pooler = AttentionHeadPooler(h_size=config.hidden_size)
        # self._pooler = ClsTokenPooler()
        self._classifier = torch.nn.Sequential(
            # torch.nn.Dropout(0.5),
            # torch.nn.LayerNorm(config.hidden_size),
            MultiStagedDropout(
                classifier=torch.nn.Linear(in_features=config.hidden_size, out_features=num_classes),
                num_stages=5,
                # start_prob=config.hidden_dropout_prob - 0.02,
                start_prob=0.1,
                increment=0.1),
            # torch.nn.Linear(in_features=config.hidden_size, out_features=num_classes),
        )

    @property
    def backbone(self) -> torch.nn.Module:
        return self._transformer

    @property
    def head_named_parameters(self) -> t.Iterator[t.Tuple[str, torch.nn.parameter.Parameter]]:
        return itertools.chain(
            self._pooler.named_parameters(),
            self._classifier.named_parameters())

    def resize_token_embeddings(self, num_tokens: int):
        self._transformer.resize_token_embeddings(num_tokens)

    def forward(self, **inputs: ModelInputValue) -> torch.Tensor:
        transformer_outputs = self._transformer(**inputs)
        x = transformer_outputs.hidden_states[-1]
        x = self._pooler(x, mask=inputs['attention_mask'])
        return self._classifier(x)


class AutoModelBuilder(ModelBuilder):

    def __init__(
            self,
            backbone_checkpoint: str,
            num_classes: int,
            enable_gradient_checkpointing: bool = False,
            pretrained_backbone_checkpoint: str | None = None):
        super().__init__(
            backbone_checkpoint=backbone_checkpoint,
            num_classes=num_classes,
            enable_gradient_checkpointing=enable_gradient_checkpointing)
        self._pretrained_backbone_checkpoint = pretrained_backbone_checkpoint

    def build(self) -> Model:
        model = _AutoModel(self._backbone_checkpoint, num_classes=self._num_classes)
        if self._enable_gradient_checkpointing:
            model.backbone.gradient_checkpointing_enable()  # type: ignore
        if self._pretrained_backbone_checkpoint is not None:
            model.load_backbone(self._pretrained_backbone_checkpoint)
        return model


#### Ensembling

In [None]:
_T = t.TypeVar('_T')

Pred1D = t.List[np.ndarray]
Pred2D = t.List[t.List[np.ndarray]]
Ensemble1DStrategy = t.Callable[[Pred1D], np.ndarray]
Ensemble2DStrategy = t.Callable[[Pred2D], np.ndarray]


def _np_mean(x_list: t.List[np.ndarray]) -> np.ndarray:
    return np.stack(x_list, axis=0).mean(axis=0)


def _softmax(x: np.ndarray, axis: int = -1) -> np.ndarray:
    return np.exp(x) / np.exp(x).sum(axis=axis, keepdims=True)


def mean_1d_ensemble_strategy(pred_1d: Pred1D) -> np.ndarray:
    return _softmax(_np_mean(pred_1d), axis=-1)


def mean_2d_ensemble_strategy(pred_2d: Pred2D) -> np.ndarray:
    return _softmax(_np_mean([_np_mean(x_list) for x_list in pred_2d]), axis=-1)


#### Iteration function

In [None]:
@torch.no_grad()
def do_pred_iteration(
        model: Model,
        data_loader: DataLoader,
        device: str,) -> np.ndarray:
    model.eval()
    score_list = []

    it = tqdm(enumerate(data_loader), desc='Validating.', total=len(data_loader))
    batch: DatasetItem
    for step, batch in it:
        model_input = batch.tokenizer_result.get_model_input(device=device)

        logit = model(**model_input).squeeze(-1)
        pred = torch_f.softmax(logit, dim=-1)

        score_list.append([[elem.item() for elem in x] for x in pred.cpu()])

    return np.array(score_list)

#### Main function

In [None]:
def _is_gpu_device(device: t.Union[str, torch.device]) -> bool:
    return str(device).startswith('cuda')

def _predict_by_model(
        df: pd.DataFrame,
        model_builder: ModelBuilder,
        tokenizer: Tokenizer,
        device: str,
        batch_size: int,
        max_len: int,
        num_workers: int,
        collator: Collator | None = None,
        add_new_special_tokens: bool = False,
        ) -> np.ndarray:
    dataset = Dataset(
        df=df,
        tokenizer=tokenizer,
        max_len=max_len)
    dataset.sort_by_tokenizer_input_len()

    collator = collator if collator is not None else Collator(default_collate_fn)
    data_loader = DataLoader(
        dataset,
        collate_fn=collator,  # type: ignore
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=_is_gpu_device(device))
    model = model_builder.build().to(device)

    if add_new_special_tokens:
        special_token_list = parse_special_token_list(df)
        print(f'Adding new special tokens: {special_token_list}')
        tokenizer.add_special_token_list(special_token_list)
        if tokenizer.num_special_tokens > 0:
            model.resize_token_embeddings(len(tokenizer))

    return do_pred_iteration(
        model=model,
        data_loader=data_loader,
        device=device)


def _arr_to_score_dict(arr: np.ndarray) -> t.Dict[str, float]:
    return {
        'score_ineffective': arr[0],
        'score_adequate': arr[1],
        'score_effective': arr[2],
    }


def predict_oof_by_k_fold_model(
        all_df: pd.DataFrame,
        fold_list: t.List[int],
        model_builder_list: t.List[ModelBuilder],
        tokenizer: Tokenizer,
        device: str,
        batch_size: int,
        max_len: int,
        num_workers: int,
        collator: Collator | None = None,
        add_new_special_tokens: bool = False) -> pd.DataFrame:
    id_list: t.List[str] = []
    score_arr_list: t.List[np.ndarray] = []
    for fold, model_builder in zip(fold_list, model_builder_list):
        df = dataframe(all_df[all_df['fold'] == fold])
        id_list.extend(df['id'].to_list())
        score_arr = _predict_by_model(
            df=df,
            model_builder=model_builder,
            tokenizer=tokenizer,
            device=device,
            batch_size=batch_size,
            max_len=max_len,
            num_workers=num_workers,
            collator=collator,
            add_new_special_tokens=add_new_special_tokens)
        score_arr_list.append(score_arr)
    return pd.DataFrame({
        'id': id_list,
        'score': np.concatenate(score_arr_list, axis=0),
    })


def predict_by_k_fold_model_list(
        df: pd.DataFrame,
        model_builder_2dlist: t.List[t.List[ModelBuilder]],
        ensemble_strategy: Ensemble2DStrategy,
        tokenizer: Tokenizer,
        device: str,
        batch_size: int,
        max_len: int,
        num_workers: int,
        collator: Collator | None = None,
        add_new_special_tokens: bool = False) -> pd.DataFrame:
    id_list = df['id'].to_list()
    pred_2d: t.List[t.List[np.ndarray]] = []
    for model_builder_list in model_builder_2dlist:
        pred_1d: t.List[np.ndarray] = []
        for model_builder in model_builder_list:
            pred_1d.append(_predict_by_model(
                df=df,
                model_builder=model_builder,
                tokenizer=tokenizer,
                device=device,
                batch_size=batch_size,
                max_len=max_len,
                num_workers=num_workers,
                collator=collator,
                add_new_special_tokens=add_new_special_tokens))
        pred_2d.append(pred_1d)
    score_arr = ensemble_strategy(id_list)
    return pd.DataFrame([
        {
            'id': id,
            **_arr_to_score_dict(arr)
        }
        for id, arr in zip(id_list, score_arr)])

#### Parameter definitions

In [None]:
SEED = 42
DEVICE = 'cuda'
NUM_FOLDS = 5

CONTEST_NAME = 'feedback-prize-effectiveness'

GDRIVE_DIR = Path('/content/drive/MyDrive')
GDRIVE_DATA_DIR = GDRIVE_DIR / 'Data'
DATASET_GDRIVE_DIR = GDRIVE_DATA_DIR / CONTEST_NAME

MODELS_DIR = GDRIVE_DIR / f'models/{CONTEST_NAME}'
OUTPUT_DIR = GDRIVE_DIR / f'oof/{CONTEST_NAME}/seq-cls'

BATCH_SIZE = 8
MAX_LEN = 512
NUM_WORKERS = 2
BACKBONE = 'microsoft/deberta-v3-large'

#### Data loading

In [None]:
seed_everything(SEED)
all_df = read_csv(DATASET_GDRIVE_DIR / 'train_ext.csv')
all_df = StratifyAndGroupByColumnValStrategy(
    stratify_by='discourse_effectiveness',
    group_by='essay_id',
    seed=SEED
).assign_folds(all_df, num_folds=NUM_FOLDS)
all_df

#### Entrypoint

In [None]:
_tokenizer = get_tokenizer_for_backbone(BACKBONE, padding_strategy=PaddingStrategy.DO_NOT_PAD)

oof_df = predict_oof_by_k_fold_model(
    all_df=all_df,
    model_builder_list=[
        AutoModelBuilder(
            backbone_checkpoint=BACKBONE,
            num_classes=3,
            pretrained_backbone_checkpoint=str(MODELS_DIR / f'microsoft-deberta-v3-large-v2-1-inf-fold_{fold}-seed_42-gpu_teslap100-pcie-16gb.pt'))
        for fold in range(NUM_FOLDS)
    ],
    tokenizer=_tokenizer,
    fold_list=list(range(NUM_FOLDS)),
    device=DEVICE,
    batch_size=BATCH_SIZE,
    max_len=MAX_LEN,
    num_workers=NUM_WORKERS,
    collator=Collator(DataCollatorWithPadding(_tokenizer.tokenizer)),
    add_new_special_tokens=True)
oof_df

In [None]:
oof_df.to_csv(OUTPUT_DIR / 'microsoft-deberta-v3-large-v2.csv', index=False)