In [1]:
import logging
import os
import sys
import time
from copy import deepcopy

import datasets
import pandas as pd
import torch
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import get_cosine_schedule_with_warmup

sys.path.append("../scripts")

try:
    from ai_dataset import AiDataset, DatasetType
    from ai_loader import AiCollator, AiCollatorTrain
    from ai_model import AiModel, ModelType
    from ai_optimizer import get_optimizer
    from metric_utils import compute_metrics
    from train_utils import AverageMeter, as_minutes, get_lr, save_checkpoint

except Exception as e:
    print(e)
    raise ImportError

logger = get_logger(__name__)


pd.options.display.max_colwidth = 1000

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
NAME = "deberta_base"
MODEL_NAME = "microsoft/deberta-v3-base"
MODEL_TYPE = ModelType.DEBERTA
DATASET_TYPE = DatasetType.DEBERTA

In [3]:
df = pd.read_csv("../../data/generated/dataset.csv")

df.columns

Index(['task', 'code', 'generated'], dtype='object')

In [4]:
df.columns = ["task", "text", "generated"]
df["id"] = df.index + 1
df = df.reset_index(drop=True)

In [5]:
train_df, val_prep = train_test_split(
    df, test_size=0.2, stratify=df["generated"], random_state=420
)
valid_df, test_df = train_test_split(
    val_prep, test_size=0.25, stratify=val_prep["generated"], random_state=420
)
train_df = train_df.reset_index(drop=True)
valid_df = valid_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)
len(train_df), len(valid_df), len(test_df)

(9942, 1864, 622)

In [6]:
accelerator = Accelerator(
    gradient_accumulation_steps=1,
)

# Make one log on every process with the configuration for debugging.
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
logger.info(accelerator.state, main_process_only=False)


def print_line():
    prefix, unit, suffix = "#", "~~", "#"
    accelerator.print(prefix + unit * 50 + suffix)


if accelerator.is_local_main_process:
    datasets.utils.logging.set_verbosity_warning()
    transformers.utils.logging.set_verbosity_info()
else:
    datasets.utils.logging.set_verbosity_error()
    transformers.utils.logging.set_verbosity_error()

# ------- Runtime Configs -----------------------------------------------------------#
print_line()
accelerator.print(f"setting seed: {42}")
set_seed(42)

if accelerator.is_main_process:
    os.makedirs("../models/r_ranking", exist_ok=True)
print_line()

05/06/2025 22:34:38 - INFO - __main__ - Distributed environment: DistributedType.NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda

Mixed precision type: no



#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
setting seed: 42
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


In [7]:
prompt_ids = train_df["task"].unique().tolist()
gdf = train_df.groupby("task")["id"].apply(list).reset_index()
prompt2ids = dict(zip(gdf["task"], gdf["id"]))

accelerator.print(f"shape of train data: {train_df.shape}")
accelerator.print(f"{train_df.head()}")
accelerator.print(f"shape of validation data: {valid_df.shape}")
accelerator.print(f"shape of test data: {test_df.shape}")
accelerator.print(f"Prompts: {prompt_ids}")

with accelerator.main_process_first():
    dataset_creator = AiDataset(MODEL_NAME, DATASET_TYPE)

    train_ds = dataset_creator.get_dataset(train_df)
    valid_ds = dataset_creator.get_dataset(valid_df)
    test_ds = dataset_creator.get_dataset(test_df)

tokenizer = dataset_creator.tokenizer

# ------- data loaders ----------------------------------------------------------------#
train_ds.set_format(type=None, columns=["id", "input_ids", "attention_mask", "generated"])

# sort valid dataset for faster evaluation
valid_ds = valid_ds.sort("input_length")

valid_ds.set_format(type=None, columns=["id", "input_ids", "attention_mask", "generated"])
valid_ids = valid_df["id"]

test_ds = test_ds.sort("input_length")

test_ds.set_format(type=None, columns=["id", "input_ids", "attention_mask", "generated"])
test_ids = test_ds["id"]

# ---
kwargs = dict(
    train_ds=train_ds,
    prompt_ids=prompt_ids,
    prompt2ids=prompt2ids,
)

data_collector_train = AiCollatorTrain(
    tokenizer=tokenizer,
    pad_to_multiple_of=64,
    kwargs=kwargs,
)

data_collector = AiCollator(tokenizer=tokenizer, pad_to_multiple_of=64)

train_dl = DataLoader(
    train_ds,
    batch_size=4,
    shuffle=True,
    collate_fn=data_collector_train,
)

valid_dl = DataLoader(
    valid_ds,
    batch_size=4,
    shuffle=False,
    collate_fn=data_collector,
)

test_dl = DataLoader(
    test_ds,
    batch_size=4,
    shuffle=False,
    collate_fn=data_collector,
)

accelerator.print("data preparation done...")
print_line()

shape of train data: (9942, 4)
                                   task  \
0  80f0773c-d8da-4f89-8ff8-f91226bfbbba   
1  4ecec1ec-211d-4ce0-bd4b-ee2498ae1afa   
2  5986f14f-59c5-428e-84aa-553175c8bdb9   
3  94a4304c-1628-4b48-a1bb-62970467c416   
4  901dd977-9e8b-4b54-9730-6aeaa9d4a9e7   

                                                                                                                                                                             text  \
0                                                                                                                       \r\na = int(input())\r\nvol = a * a * a\r\nprint(vol)\r\n   
1                                \r\nN = 12345\r\nres = []\r\nwhile N > 0:\r\n    digit = N % 10\r\n    res.append(digit)\r\n    N = N // 10\r\nfor i in res:\r\n    print(i)\r\n   
2  asam,fof=map(int, input().split())\r\nw=(asam*fof)\r\neue=int(input())\r\n\r\nif w//9>=eue:\r\n\tprint('NO')\r\n\tprint(0)\r\nelse:\r\n\tprint('YES')\r\n\tprint(eue

loading file spm.model from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3\spm.model
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3\tokenizer_config.json
loading file tokenizer.json from cache at None
loading file chat_template.jinja from cache at None
loading configuration file config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3\config.json
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "leg

Column name ['__index_level_0__'] not in the dataset. Current columns in the dataset: ['task', 'text', 'generated', 'id', 'input_ids', 'attention_mask', 'input_length']


Map: 100%|██████████| 1864/1864 [00:00<00:00, 7253.06 examples/s]
Map: 100%|██████████| 1864/1864 [00:00<00:00, 37280.92 examples/s]


Column name ['__index_level_0__'] not in the dataset. Current columns in the dataset: ['task', 'text', 'generated', 'id', 'input_ids', 'attention_mask', 'input_length']


Map: 100%|██████████| 622/622 [00:00<00:00, 7404.88 examples/s]
Map: 100%|██████████| 622/622 [00:00<00:00, 34301.34 examples/s]

Column name ['__index_level_0__'] not in the dataset. Current columns in the dataset: ['task', 'text', 'generated', 'id', 'input_ids', 'attention_mask', 'input_length']
setting random seed in data collator as: 1746560086504
data preparation done...
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#





In [8]:
# -------- Evaluation -------------------------------------------------------------#
def run_evaluation(accelerator, model, valid_dl, valid_ids):
    model.eval()

    all_predictions = []
    all_truths = []

    progress_bar = tqdm(
        range(len(valid_dl)), disable=not accelerator.is_local_main_process
    )

    for batch in valid_dl:
        with torch.no_grad():
            logits, _ = model(**batch)
            logits = logits.reshape(-1)
        predictions = torch.sigmoid(logits)
        predictions, references = accelerator.gather_for_metrics(
            (predictions, batch["labels"].to(torch.long).reshape(-1))
        )
        predictions, references = (
            predictions.cpu().numpy().tolist(),
            references.cpu().numpy().tolist(),
        )

        all_predictions.extend(predictions)
        all_truths.extend(references)

        progress_bar.update(1)
    progress_bar.close()

    # compute metric
    eval_dict = compute_metrics(all_predictions, all_truths)

    result_df = pd.DataFrame()
    result_df["id"] = valid_ids
    result_df["predictions"] = all_predictions
    result_df["truths"] = all_truths

    oof_df = deepcopy(result_df)
    oof_df = oof_df.rename(columns={"predictions": "generated"})
    oof_df = oof_df[["id", "generated"]].copy()

    to_return = {
        "scores": eval_dict,
        "result_df": result_df,
        "oof_df": oof_df,
    }

    return to_return


# -------- Main Function ---------------------------------------------------------#
def run_training(train_dl, valid_dl, test_dl):
    # --- show batch -------------------------------------------------------------------#
    # print_line()

    # for b in train_dl:
    #     break
    # show_batch(b, tokenizer, task="training", print_fn=print, n_examples=4)

    # print_line()

    # for b in valid_dl:
    #     break
    # show_batch(b, tokenizer, task="validation", print_fn=accelerator.print)

    # print_line()

    # ------- Config -------------------------------------------------------------------#
    accelerator.print("config for the current run:")
    print_line()

    # ------- Model --------------------------------------------------------------------#
    print_line()
    print("creating the LLM Detection model...")
    model = AiModel(accelerator.device, MODEL_NAME, MODEL_TYPE)
    print_line()

    # ------- Optimizer ----------------------------------------------------------------#
    print_line()
    print("creating the optimizer...")
    optimizer = get_optimizer(model)
    # ------- Prepare -------------------------------------------------------------------#

    model, optimizer, train_dl, valid_dl, test_dl = accelerator.prepare(
        model, optimizer, train_dl, valid_dl, test_dl
    )

    # ------- Scheduler -----------------------------------------------------------------#
    print_line()
    num_epochs = 2
    grad_accumulation_steps = 1
    warmup_pct = 0.1

    num_update_steps_per_epoch = len(train_dl) // grad_accumulation_steps
    num_training_steps = num_epochs * num_update_steps_per_epoch
    num_warmup_steps = int(warmup_pct * num_training_steps)

    accelerator.print(f"# training updates per epoch: {num_update_steps_per_epoch}")
    accelerator.print(f"# training steps: {num_training_steps}")
    accelerator.print(f"# warmup steps: {num_warmup_steps}")

    scheduler = get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
    )

    # ------- training setup --------------------------------------------------------------#
    best_lb = -1  # track recall@1000

    patience_tracker = 0
    current_iteration = 0

    # ------- training  --------------------------------------------------------------------#
    start_time = time.time()
    accelerator.wait_for_everyone()

    for epoch in range(num_epochs):
        # close and reset progress bar
        if epoch != 0:
            progress_bar.close()

        progress_bar = tqdm(
            range(num_update_steps_per_epoch),
            disable=not accelerator.is_local_main_process,
        )
        loss_meter = AverageMeter()

        # Training ------
        model.train()
        for step, batch in enumerate(train_dl):
            with accelerator.accumulate(model):
                _, loss = model(**batch)
                accelerator.backward(loss)

                if accelerator.sync_gradients:
                    accelerator.clip_grad_norm_(model.parameters(), 1.0)

                    optimizer.step()  # gradient_state.sync_gradients check is performed inside optimizer.step
                    scheduler.step()
                    optimizer.zero_grad()

                loss_meter.update(loss.item())

            if accelerator.sync_gradients:
                progress_bar.set_description(
                    f"STEP: {current_iteration + 1:5}/{num_update_steps_per_epoch:5}. "
                    f"LR: {get_lr(optimizer):.4f}. "
                    f"Loss: {loss_meter.avg:.4f}. "
                )

                progress_bar.update(1)
                current_iteration += 1

            # >--------------------------------------------------|
            # >-- evaluation ------------------------------------|
            # >--------------------------------------------------|

            if (accelerator.sync_gradients) & (current_iteration % 500 == 0):
                # set model in eval mode
                model.eval()
                eval_response = run_evaluation(accelerator, model, valid_dl, valid_ids)

                scores_dict = eval_response["scores"]
                result_df = eval_response["result_df"]
                oof_df = eval_response["oof_df"]
                lb = scores_dict["lb"]
                r2 = scores_dict["r2"]
                f1 = scores_dict["f1"]
                accuracy = scores_dict["accuracy"]
                precision = scores_dict["precision"]
                recall = scores_dict["recall"]

                print_line()
                et = as_minutes(time.time() - start_time)
                accelerator.print(
                    f">>> Epoch {epoch + 1} | Step {step} | Total Step {current_iteration} | Time: {et}"
                )
                print_line()
                accelerator.print(
                    f">>> Current LB (AUC) = {round(lb, 4)}, R2 = {r2}, F1 = {f1}, accuracy = {accuracy}, precision = {precision}, recall = {recall}"
                )

                print_line()

                is_best = False
                if lb >= best_lb:
                    best_lb = lb
                    is_best = True
                    patience_tracker = 0

                    # -----
                    best_dict = dict()
                    for k, v in scores_dict.items():
                        best_dict[f"{k}_at_best"] = v
                else:
                    patience_tracker += 1

                if is_best:
                    oof_df.to_csv(
                        os.path.join("../models/r_ranking", "oof_df_best.csv"),
                        index=False,
                    )
                    result_df.to_csv(
                        os.path.join("../models/r_ranking", "result_df_best.csv"),
                        index=False,
                    )
                else:
                    accelerator.print(f">>> patience reached {patience_tracker}/{10}")
                    accelerator.print(f">>> current best score: {round(best_lb, 4)}")

                oof_df.to_csv(
                    os.path.join("../models/r_ranking", "oof_df_last.csv"), index=False
                )
                result_df.to_csv(
                    os.path.join("../models/r_ranking", "result_df_last.csv"),
                    index=False,
                )

                # saving -----
                accelerator.wait_for_everyone()
                unwrapped_model = accelerator.unwrap_model(model)
                model_state = {
                    "step": current_iteration,
                    "epoch": epoch + 1,
                    "state_dict": unwrapped_model.state_dict(),
                    "lb": lb,
                }

                if accelerator.is_main_process:
                    save_checkpoint(model_state, is_best=is_best, name=NAME)

                # -- post eval
                model.train()
                torch.cuda.empty_cache()
                print_line()

                # early stopping ----
                if patience_tracker >= 10:
                    print("stopping early")
                    model.eval()
                    accelerator.end_training()
                    return

    # check on test
    print_line()
    model.eval()
    eval_response = run_evaluation(accelerator, model, test_dl, test_ids)

    scores_dict = eval_response["scores"]
    result_df = eval_response["result_df"]
    oof_df = eval_response["oof_df"]
    lb = scores_dict["lb"]
    r2 = scores_dict["r2"]
    f1 = scores_dict["f1"]
    accuracy = scores_dict["accuracy"]
    precision = scores_dict["precision"]
    recall = scores_dict["recall"]
    print_line()
    accelerator.print(
        f">>> Final performance on test AUC = {round(lb, 4)}, R2 = {r2}, F1 = {f1}, accuracy = {accuracy}, precision = {precision}, recall = {recall}"
    )
    print_line()


if __name__ == "__main__":
    run_training(train_dl, valid_dl, test_dl)

config for the current run:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
creating the LLM Detection model...
initializing the Rank Model...


loading configuration file config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3\config.json
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "legacy": true,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.51.3",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file pyto

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
creating the optimizer...
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# training updates per epoch: 2486
# training steps: 4972
# warmup steps: 497


STEP:     1/ 2486. LR: 0.0402. Loss: 0.7350. :   0%|          | 1/2486 [00:00<11:20,  3.65it/s]Safetensors PR exists
100%|██████████| 466/466 [00:10<00:00, 43.83it/s]20%|██        | 500/2486 [01:23<06:45,  4.89it/s]


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 499 | Total Step 500 | Time: 1m34s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8228, R2 = 0.3034, F1 = 0.8439, accuracy = 0.8262, precision = 0.7935, recall = 0.9012
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:   501/ 2486. LR: 20.0000. Loss: 0.6380. :  20%|██        | 501/2486 [01:35<2:02:08,  3.69s/it]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:09<00:00, 48.32it/s]40%|████      | 1000/2486 [02:58<04:03,  6.10it/s] 


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 999 | Total Step 1000 | Time: 3m8s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.835, R2 = 0.3529, F1 = 0.8556, accuracy = 0.8385, precision = 0.8014, recall = 0.9177
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  1001/ 2486. LR: 19.3805. Loss: 0.6143. :  40%|████      | 1001/2486 [03:09<1:22:36,  3.34s/it]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:09<00:00, 48.33it/s]60%|██████    | 1500/2486 [04:35<02:59,  5.50it/s]  


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 1499 | Total Step 1500 | Time: 4m44s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8658, R2 = 0.4625, F1 = 0.8709, accuracy = 0.8659, precision = 0.8745, recall = 0.8673
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  1501/ 2486. LR: 17.6172. Loss: 0.6001. :  60%|██████    | 1501/2486 [04:46<55:26,  3.38s/it]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:10<00:00, 44.47it/s]80%|████████  | 2000/2486 [06:07<01:26,  5.60it/s]


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 1999 | Total Step 2000 | Time: 6m18s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8349, R2 = 0.355, F1 = 0.858, accuracy = 0.8391, precision = 0.7947, recall = 0.9321
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> patience reached 1/10
>>> current best score: 0.8658


STEP:  2001/ 2486. LR: 14.9248. Loss: 0.5963. :  80%|████████  | 2001/2486 [06:19<28:34,  3.54s/it]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  2486/ 2486. LR: 11.7357. Loss: 0.5928. : 100%|██████████| 2486/2486 [07:42<00:00,  5.37it/s]
100%|██████████| 466/466 [00:13<00:00, 35.66it/s] 1%|          | 14/2486 [00:02<07:47,  5.29it/s]


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 2 | Step 13 | Total Step 2500 | Time: 7m58s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.873, R2 = 0.4926, F1 = 0.8791, accuracy = 0.8734, precision = 0.8755, recall = 0.8827
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:12<00:00, 36.08it/s]1%|██        | 514/2486 [01:39<05:12,  6.32it/s]  


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 2 | Step 513 | Total Step 3000 | Time: 9m35s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8822, R2 = 0.5356, F1 = 0.8929, accuracy = 0.8841, precision = 0.8621, recall = 0.9259
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  3001/ 2486. LR: 8.1400. Loss: 0.5791. :  21%|██        | 515/2486 [01:53<2:25:47,  4.44s/it]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:12<00:00, 37.21it/s]1%|████      | 1014/2486 [03:16<04:09,  5.90it/s] 


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 2 | Step 1013 | Total Step 3500 | Time: 11m12s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8858, R2 = 0.5464, F1 = 0.8934, accuracy = 0.8868, precision = 0.8779, recall = 0.9095
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:09<00:00, 49.22it/s]1%|██████    | 1514/2486 [04:51<02:49,  5.75it/s]  


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 2 | Step 1513 | Total Step 4000 | Time: 12m43s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8823, R2 = 0.5356, F1 = 0.8926, accuracy = 0.8841, precision = 0.8635, recall = 0.9239
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> patience reached 1/10
>>> current best score: 0.8858


STEP:  4001/ 2486. LR: 2.2348. Loss: 0.5692. :  61%|██████    | 1515/2486 [05:01<52:03,  3.22s/it]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 466/466 [00:06<00:00, 68.57it/s]1%|████████  | 2014/2486 [06:04<00:44, 10.67it/s]


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 2 | Step 2013 | Total Step 4500 | Time: 13m54s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.8887, R2 = 0.5571, F1 = 0.8954, accuracy = 0.8895, precision = 0.8838, recall = 0.9074
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  4501/ 2486. LR: 0.5417. Loss: 0.5694. :  81%|████████  | 2014/2486 [06:12<00:44, 10.67it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  4972/ 2486. LR: 0.0000. Loss: 0.5682. : 100%|█████████▉| 2485/2486 [06:47<00:00, 14.39it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 156/156 [00:01<00:00, 85.87it/s]
STEP:  4972/ 2486. LR: 0.0000. Loss: 0.5682. : 100%|██████████| 2486/2486 [06:49<00:00,  6.07it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Final performance on test AUC = 0.8983, R2 = 0.5941, F1 = 0.9032, accuracy = 0.8987, precision = 0.8991, recall = 0.9074
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#





In [9]:
checkpoint_path = f"./checkpoints_path/{NAME}_best.pth.tar"
model_test = AiModel(accelerator.device, MODEL_NAME, MODEL_TYPE)
ckpt = torch.load(checkpoint_path, weights_only=False)
model_test.load_state_dict(ckpt["state_dict"])
eval_response = run_evaluation(accelerator, model_test, test_dl, test_ids)
eval_response

initializing the Rank Model...


loading configuration file config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-base\snapshots\8ccc9b6f36199bec6961081d44eb72fb3f7353f3\config.json
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "legacy": true,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.51.3",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file pyto

{'scores': {'lb': 0.8985,
  'r2': 0.5941,
  'f1': 0.9029,
  'accuracy': 0.8987,
  'precision': 0.9015,
  'recall': 0.9043},
 'result_df':         id  predictions  truths
 0     9686     0.026186       0
 1     9697     0.026186       0
 2     7977     0.005928       0
 3     7972     0.178590       0
 4      181     0.238762       1
 ..     ...          ...     ...
 617   4778     0.994476       1
 618   4232     0.997314       1
 619     25     0.956156       1
 620  12266     0.003488       0
 621   5054     0.993565       1
 
 [622 rows x 3 columns],
 'oof_df':         id  generated
 0     9686   0.026186
 1     9697   0.026186
 2     7977   0.005928
 3     7972   0.178590
 4      181   0.238762
 ..     ...        ...
 617   4778   0.994476
 618   4232   0.997314
 619     25   0.956156
 620  12266   0.003488
 621   5054   0.993565
 
 [622 rows x 2 columns]}

In [10]:
eval_response["result_df"]

Unnamed: 0,id,predictions,truths
0,9686,0.026186,0
1,9697,0.026186,0
2,7977,0.005928,0
3,7972,0.178590,0
4,181,0.238762,1
...,...,...,...
617,4778,0.994476,1
618,4232,0.997314,1
619,25,0.956156,1
620,12266,0.003488,0
