In [1]:
import logging
import os
import sys
import time
from copy import deepcopy

import datasets
import pandas as pd
import torch
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import get_cosine_schedule_with_warmup

sys.path.append("../scripts")

try:
    from ai_dataset import AiDataset
    from ai_loader import AiCollator, AiCollatorTrain
    from ai_model import AiModel
    from ai_optimizer import get_optimizer
    from metric_utils import compute_metrics
    from train_utils import AverageMeter, as_minutes, get_lr, save_checkpoint

except Exception as e:
    print(e)
    raise ImportError

logger = get_logger(__name__)


pd.options.display.max_colwidth = 1000

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from sklearn.model_selection import train_test_split

df_gen = pd.read_csv("../../data/generated/gen_solutions.csv")
df_gen = df_gen[["spec", "solution"]]
df_gen.columns = ["task", "text"]
df_gen["generated"] = 1
df_real = pd.read_csv("../../data/db_attempts.csv")
df_real = df_real[["task", "programText"]]
df_real.columns = ["task", "text"]
df_real["generated"] = 0
df = pd.concat([df_gen, df_real], axis=0, ignore_index=True)
df = df.dropna(subset=["text"])
df["text"] = df["text"].astype(str)
df.drop_duplicates()
df["id"] = df.index + 1

df = df.reset_index(drop=True)
df

Unnamed: 0,task,text,generated,id
0,c0df7d49-26f5-451c-b44a-1e0bca60bca5,\r\nn = 1\r\nwhile n:\r\n n = int(input())\r\n if n:\r\n print(n+1)\r\n break\r\n,1,1
1,c0df7d49-26f5-451c-b44a-1e0bca60bca5,\r\ncount = 0\r\nnum = input()\r\nwhile num != '0':\r\n count += 1\r\n num = input()\r\nprint(count)\r\n,1,2
2,c0df7d49-26f5-451c-b44a-1e0bca60bca5,sequence = []\r\nwhile True:\r\n num = int(input())\r\n if num == 0:\r\n break\r\n sequence.append(num)\r\n\r\nprint(len(sequence)),1,3
3,4e5b21c0-e86f-4eac-82b6-1a0d00ae4199,a=[]\r\nwhile True:\r\n b=input()\r\n if b=='0':\r\n break\r\n a+=[int(b)]\r\nprint(a.count(max(a))),1,4
4,4e5b21c0-e86f-4eac-82b6-1a0d00ae4199,\r\nmax_val = float('-inf')\r\ncount = 0\r\n\r\nwhile True:\r\n num = int(input())\r\n if num == 0:\r\n break\r\n if num > max_val:\r\n max_val = num\r\n count = 1\r\n elif num == max_val:\r\n count += 1\r\n\r\nprint(count)\r\n,1,5
...,...,...,...,...
9246,49c03922-c57e-464a-8191-9ebef2615808,"a, b, c = map(int, input().split())\nif c > a and c > b:\n print(c //b + 1 + c// a + 1)\nelif a == b == c:\n print(4)\nelse:\n print(2)\n",0,9328
9247,2173e6e8-cca6-47c7-8ef6-ea93d3e736f4,"n,*a=map(int,open(0).read().split())\na=sorted(a)\nd=[0]*n\nd[0]=1e9\nd[1]=a[1]-a[0]\nfor i in range(2,n):\n d[i]=min(d[i-1]+a[i]-a[i-1],d[i-2]+a[i]-a[i-1])\nprint(int(d[-1]))\n",0,9329
9248,2a42d873-b1fe-4256-a488-91db4eaa8d9f,"n=list(input())\nd=sorted(list(input()))\nif max(d)>min(n):\n p=-1\n for i in range(len(n)):\n if n[i]<d[p]:\n n[i]=d[p]\n p-=1\nprint("""".join(n))",0,9330
9249,48f237db-e593-4be6-b3f9-10757a1d07ec,a=int(input())\nprint(a),0,9331


In [3]:
train_df, val_prep = train_test_split(df, test_size=0.2, stratify=df["generated"])
valid_df, test_df = train_test_split(
    val_prep, test_size=0.25, stratify=val_prep["generated"]
)
train_df = train_df.reset_index(drop=True)
valid_df = valid_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)
len(train_df), len(valid_df), len(test_df)

(7400, 1388, 463)

In [4]:
accelerator = Accelerator(
    gradient_accumulation_steps=1,
)

# Make one log on every process with the configuration for debugging.
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
logger.info(accelerator.state, main_process_only=False)


def print_line():
    prefix, unit, suffix = "#", "~~", "#"
    accelerator.print(prefix + unit * 50 + suffix)


if accelerator.is_local_main_process:
    datasets.utils.logging.set_verbosity_warning()
    transformers.utils.logging.set_verbosity_info()
else:
    datasets.utils.logging.set_verbosity_error()
    transformers.utils.logging.set_verbosity_error()

# ------- Runtime Configs -----------------------------------------------------------#
print_line()
accelerator.print(f"setting seed: {42}")
set_seed(42)

if accelerator.is_main_process:
    os.makedirs("../models/r_ranking", exist_ok=True)
print_line()

04/15/2025 22:07:49 - INFO - __main__ - Distributed environment: DistributedType.NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda

Mixed precision type: no



#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
setting seed: 42
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


In [5]:
prompt_ids = train_df["task"].unique().tolist()
gdf = train_df.groupby("task")["id"].apply(list).reset_index()
prompt2ids = dict(zip(gdf["task"], gdf["id"]))

accelerator.print(f"shape of train data: {train_df.shape}")
accelerator.print(f"{train_df.head()}")
accelerator.print(f"shape of validation data: {valid_df.shape}")
accelerator.print(f"shape of test data: {test_df.shape}")
accelerator.print(f"Prompts: {prompt_ids}")

with accelerator.main_process_first():
    dataset_creator = AiDataset()

    train_ds = dataset_creator.get_dataset(train_df)
    valid_ds = dataset_creator.get_dataset(valid_df)
    test_ds = dataset_creator.get_dataset(test_df)

tokenizer = dataset_creator.tokenizer

# ------- data loaders ----------------------------------------------------------------#
train_ds.set_format(type=None, columns=["id", "input_ids", "attention_mask", "generated"])

# sort valid dataset for faster evaluation
valid_ds = valid_ds.sort("input_length")

valid_ds.set_format(type=None, columns=["id", "input_ids", "attention_mask", "generated"])
valid_ids = valid_df["id"]

test_ds = test_ds.sort("input_length")

test_ds.set_format(type=None, columns=["id", "input_ids", "attention_mask", "generated"])
test_ids = test_ds["id"]

# ---
kwargs = dict(
    train_ds=train_ds,
    prompt_ids=prompt_ids,
    prompt2ids=prompt2ids,
)

data_collector_train = AiCollatorTrain(
    tokenizer=tokenizer,
    pad_to_multiple_of=64,
    kwargs=kwargs,
)

data_collector = AiCollator(tokenizer=tokenizer, pad_to_multiple_of=64)

train_dl = DataLoader(
    train_ds,
    batch_size=4,
    shuffle=True,
    collate_fn=data_collector_train,
)

valid_dl = DataLoader(
    valid_ds,
    batch_size=4,
    shuffle=False,
    collate_fn=data_collector,
)

test_dl = DataLoader(
    test_ds,
    batch_size=4,
    shuffle=False,
    collate_fn=data_collector,
)

accelerator.print("data preparation done...")
print_line()

shape of train data: (7400, 4)
                                   task  \
0  85b72f3a-191b-4d99-9207-6a82ed1e73c9   
1  008b5cbd-4257-4e11-a061-f19e550b6b3e   
2  c9e1ff81-5755-43cb-8439-49ef952f622a   
3  c270a9c3-67af-4113-8e7d-a7a94792013d   
4  901dd977-9e8b-4b54-9730-6aeaa9d4a9e7   

                                                                                                                                                                                                                                                                                                                                                             text  \
0                                                                                                                                                      k, n=map(int, input().split())\na=[int(p) for p in input().split()]\nS=0\na[0]-=k\nif a[0]<0:\n    a[0]=0\nfor i in range(1, n):\n    a[i]+=a[i-1]-k\n    if a[i]<0:\n        a[i]=0\n    S=a[i]\nprint(S)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
loading file spm.model from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-xsmall\snapshots\4b419818330868dff6a60ad3e6b1c730f8b8c0c6\spm.model
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at None
loading file tokenizer_config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-xsmall\snapshots\4b419818330868dff6a60ad3e6b1c730f8b8c0c6\tokenizer_config.json
loading file tokenizer.json from cache at None
loading file chat_template.jinja from cache at None
loading configuration file config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-xsmall\snapshots\4b419818330868dff6a60ad

Column name ['__index_level_0__'] not in the dataset. Current columns in the dataset: ['task', 'text', 'generated', 'id', 'input_ids', 'attention_mask', 'input_length']


Map: 100%|██████████| 1388/1388 [00:00<00:00, 6940.00 examples/s]
Map: 100%|██████████| 1388/1388 [00:00<00:00, 33047.95 examples/s]


Column name ['__index_level_0__'] not in the dataset. Current columns in the dataset: ['task', 'text', 'generated', 'id', 'input_ids', 'attention_mask', 'input_length']


Map: 100%|██████████| 463/463 [00:00<00:00, 6173.39 examples/s]
Map: 100%|██████████| 463/463 [00:00<00:00, 28941.32 examples/s]

Column name ['__index_level_0__'] not in the dataset. Current columns in the dataset: ['task', 'text', 'generated', 'id', 'input_ids', 'attention_mask', 'input_length']
setting random seed in data collator as: 1744744105067
data preparation done...
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#





In [6]:
# -------- Evaluation -------------------------------------------------------------#
def run_evaluation(accelerator, model, valid_dl, valid_ids):
    model.eval()

    all_predictions = []
    all_truths = []

    progress_bar = tqdm(
        range(len(valid_dl)), disable=not accelerator.is_local_main_process
    )

    for batch in valid_dl:
        with torch.no_grad():
            logits, _ = model(**batch)
            logits = logits.reshape(-1)
        predictions = torch.sigmoid(logits)
        predictions, references = accelerator.gather_for_metrics(
            (predictions, batch["labels"].to(torch.long).reshape(-1))
        )
        predictions, references = (
            predictions.cpu().numpy().tolist(),
            references.cpu().numpy().tolist(),
        )

        all_predictions.extend(predictions)
        all_truths.extend(references)

        progress_bar.update(1)
    progress_bar.close()

    # compute metric
    eval_dict = compute_metrics(all_predictions, all_truths)

    result_df = pd.DataFrame()
    result_df["id"] = valid_ids
    result_df["predictions"] = all_predictions
    result_df["truths"] = all_truths

    oof_df = deepcopy(result_df)
    oof_df = oof_df.rename(columns={"predictions": "generated"})
    oof_df = oof_df[["id", "generated"]].copy()

    to_return = {
        "scores": eval_dict,
        "result_df": result_df,
        "oof_df": oof_df,
    }

    return to_return


# -------- Main Function ---------------------------------------------------------#
def run_training(train_dl, valid_dl, test_dl):
    # --- show batch -------------------------------------------------------------------#
    # print_line()

    # for b in train_dl:
    #     break
    # show_batch(b, tokenizer, task="training", print_fn=print, n_examples=4)

    # print_line()

    # for b in valid_dl:
    #     break
    # show_batch(b, tokenizer, task="validation", print_fn=accelerator.print)

    # print_line()

    # ------- Config -------------------------------------------------------------------#
    accelerator.print("config for the current run:")
    print_line()

    # ------- Model --------------------------------------------------------------------#
    print_line()
    print("creating the LLM Detection model...")
    model = AiModel(accelerator.device)
    print_line()

    # ------- Optimizer ----------------------------------------------------------------#
    print_line()
    print("creating the optimizer...")
    optimizer = get_optimizer(model)
    # ------- Prepare -------------------------------------------------------------------#

    model, optimizer, train_dl, valid_dl, test_dl = accelerator.prepare(
        model, optimizer, train_dl, valid_dl, test_dl
    )

    # ------- Scheduler -----------------------------------------------------------------#
    print_line()
    num_epochs = 1
    grad_accumulation_steps = 1
    warmup_pct = 0.1

    num_update_steps_per_epoch = len(train_dl) // grad_accumulation_steps
    num_training_steps = num_epochs * num_update_steps_per_epoch
    num_warmup_steps = int(warmup_pct * num_training_steps)

    accelerator.print(f"# training updates per epoch: {num_update_steps_per_epoch}")
    accelerator.print(f"# training steps: {num_training_steps}")
    accelerator.print(f"# warmup steps: {num_warmup_steps}")

    scheduler = get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
    )

    # ------- training setup --------------------------------------------------------------#
    best_lb = -1  # track recall@1000

    patience_tracker = 0
    current_iteration = 0

    # ------- training  --------------------------------------------------------------------#
    start_time = time.time()
    accelerator.wait_for_everyone()

    for epoch in range(num_epochs):
        # close and reset progress bar
        if epoch != 0:
            progress_bar.close()

        progress_bar = tqdm(
            range(num_update_steps_per_epoch),
            disable=not accelerator.is_local_main_process,
        )
        loss_meter = AverageMeter()

        # Training ------
        model.train()
        for step, batch in enumerate(train_dl):
            with accelerator.accumulate(model):
                _, loss = model(**batch)
                accelerator.backward(loss)

                if accelerator.sync_gradients:
                    accelerator.clip_grad_norm_(model.parameters(), 1.0)

                    optimizer.step()  # gradient_state.sync_gradients check is performed inside optimizer.step
                    scheduler.step()
                    optimizer.zero_grad()

                loss_meter.update(loss.item())

            if accelerator.sync_gradients:
                progress_bar.set_description(
                    f"STEP: {current_iteration + 1:5}/{num_update_steps_per_epoch:5}. "
                    f"LR: {get_lr(optimizer):.4f}. "
                    f"Loss: {loss_meter.avg:.4f}. "
                )

                progress_bar.update(1)
                current_iteration += 1

            # >--------------------------------------------------|
            # >-- evaluation ------------------------------------|
            # >--------------------------------------------------|

            if (accelerator.sync_gradients) & (current_iteration % 500 == 0):
                # set model in eval mode
                model.eval()
                eval_response = run_evaluation(accelerator, model, valid_dl, valid_ids)

                scores_dict = eval_response["scores"]
                result_df = eval_response["result_df"]
                oof_df = eval_response["oof_df"]
                lb = scores_dict["lb"]
                r2 = scores_dict["r2"]
                f1 = scores_dict["f1"]
                recall = scores_dict["recall"]

                print_line()
                et = as_minutes(time.time() - start_time)
                accelerator.print(
                    f">>> Epoch {epoch + 1} | Step {step} | Total Step {current_iteration} | Time: {et}"
                )
                print_line()
                accelerator.print(
                    f">>> Current LB (AUC) = {round(lb, 4)}, R2 = {r2}, F1 = {f1}, recall = {recall}"
                )

                print_line()

                is_best = False
                if lb >= best_lb:
                    best_lb = lb
                    is_best = True
                    patience_tracker = 0

                    # -----
                    best_dict = dict()
                    for k, v in scores_dict.items():
                        best_dict[f"{k}_at_best"] = v
                else:
                    patience_tracker += 1

                if is_best:
                    oof_df.to_csv(
                        os.path.join("../models/r_ranking", "oof_df_best.csv"),
                        index=False,
                    )
                    result_df.to_csv(
                        os.path.join("../models/r_ranking", "result_df_best.csv"),
                        index=False,
                    )
                else:
                    accelerator.print(f">>> patience reached {patience_tracker}/{10}")
                    accelerator.print(f">>> current best score: {round(best_lb, 4)}")

                oof_df.to_csv(
                    os.path.join("../models/r_ranking", "oof_df_last.csv"), index=False
                )
                result_df.to_csv(
                    os.path.join("../models/r_ranking", "result_df_last.csv"),
                    index=False,
                )

                # saving -----
                accelerator.wait_for_everyone()
                unwrapped_model = accelerator.unwrap_model(model)
                model_state = {
                    "step": current_iteration,
                    "epoch": epoch + 1,
                    "state_dict": unwrapped_model.state_dict(),
                    "lb": lb,
                }

                if accelerator.is_main_process:
                    save_checkpoint(model_state, is_best=is_best)

                # -- post eval
                model.train()
                torch.cuda.empty_cache()
                print_line()

                # early stopping ----
                if patience_tracker >= 10:
                    print("stopping early")
                    model.eval()
                    accelerator.end_training()
                    return

    # check on test
    print_line()
    model.eval()
    eval_response = run_evaluation(accelerator, model, test_dl, test_ids)

    scores_dict = eval_response["scores"]
    result_df = eval_response["result_df"]
    oof_df = eval_response["oof_df"]
    lb = scores_dict["lb"]
    r2 = scores_dict["r2"]
    f1 = scores_dict["f1"]
    recall = scores_dict["recall"]
    print_line()
    accelerator.print(
        f">>> Final performance on test AUC = {round(lb, 4)}, R2 = {r2}, F1 = {f1}, recall = {recall}"
    )
    print_line()


if __name__ == "__main__":
    run_training(train_dl, valid_dl, test_dl)

config for the current run:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
creating the LLM Detection model...
initializing the Rank Model...


loading configuration file config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-xsmall\snapshots\4b419818330868dff6a60ad3e6b1c730f8b8c0c6\config.json
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 384,
  "initializer_range": 0.02,
  "intermediate_size": 1536,
  "layer_norm_eps": 1e-07,
  "legacy": true,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 6,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 384,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.51.0",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file pyt

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
creating the optimizer...
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# training updates per epoch: 1850
# training steps: 1850
# warmup steps: 185


STEP:     1/ 1850. LR: 0.1081. Loss: 0.7417. :   0%|          | 1/1850 [00:00<17:15,  1.78it/s]Safetensors PR exists
100%|██████████| 347/347 [00:03<00:00, 90.74it/s]27%|██▋       | 500/1850 [00:31<01:18, 17.23it/s]


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 499 | Total Step 500 | Time: 0m35s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.9919, R2 = 0.9623, F1 = 0.988, recall = 0.9939
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:   502/ 1850. LR: 18.2639. Loss: 0.5816. :  27%|██▋       | 502/1850 [00:36<15:26,  1.45it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 347/347 [00:03<00:00, 91.46it/s]54%|█████▍    | 1000/1850 [01:06<00:48, 17.64it/s]


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 999 | Total Step 1000 | Time: 1m10s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.9961, R2 = 0.978, F1 = 0.993, recall = 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  1002/ 1850. LR: 10.2924. Loss: 0.5587. :  54%|█████▍    | 1002/1850 [01:10<09:40,  1.46it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 347/347 [00:04<00:00, 86.50it/s]1%|████████  | 1500/1850 [01:41<00:22, 15.88it/s] 


#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Epoch 1 | Step 1499 | Total Step 1500 | Time: 1m45s
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Current LB (AUC) = 0.9974, R2 = 0.9906, F1 = 0.997, recall = 0.996
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  1502/ 1850. LR: 2.0794. Loss: 0.5554. :  81%|████████  | 1502/1850 [01:45<04:12,  1.38it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


STEP:  1850/ 1850. LR: 0.0000. Loss: 0.5548. : 100%|██████████| 1850/1850 [02:07<00:00, 17.34it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


100%|██████████| 116/116 [00:01<00:00, 86.50it/s]
STEP:  1850/ 1850. LR: 0.0000. Loss: 0.5548. : 100%|██████████| 1850/1850 [02:08<00:00, 14.38it/s]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
>>> Final performance on test AUC = 1.0, R2 = 1.0, F1 = 1.0, recall = 1.0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#





In [7]:
checkpoint_path = "./checkpoints_path/detect_ai_model_last.pth.tar"
model_test = AiModel(accelerator.device)
ckpt = torch.load(checkpoint_path, weights_only=False)
model_test.load_state_dict(ckpt["state_dict"])
eval_response = run_evaluation(accelerator, model_test, test_dl, test_ids)
eval_response

initializing the Rank Model...


loading configuration file config.json from cache at C:\Users\Kiaver\.cache\huggingface\hub\models--microsoft--deberta-v3-xsmall\snapshots\4b419818330868dff6a60ad3e6b1c730f8b8c0c6\config.json
Model config DebertaV2Config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 384,
  "initializer_range": 0.02,
  "intermediate_size": 1536,
  "layer_norm_eps": 1e-07,
  "legacy": true,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 6,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 384,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.51.0",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

loading weights file pyt

{'scores': {'lb': 1.0, 'r2': 1.0, 'f1': 1.0, 'recall': 1.0},
 'result_df':        id  predictions  truths
 0     358     0.954265       1
 1    1906     0.958759       1
 2     207     0.975657       1
 3    1579     0.820372       1
 4    3885     0.030616       0
 ..    ...          ...     ...
 458   332     0.981398       1
 459   462     0.980917       1
 460  6032     0.029438       0
 461  8870     0.027041       0
 462   783     0.980884       1
 
 [463 rows x 3 columns],
 'oof_df':        id  generated
 0     358   0.954265
 1    1906   0.958759
 2     207   0.975657
 3    1579   0.820372
 4    3885   0.030616
 ..    ...        ...
 458   332   0.981398
 459   462   0.980917
 460  6032   0.029438
 461  8870   0.027041
 462   783   0.980884
 
 [463 rows x 2 columns]}

In [8]:
eval_response["result_df"]

Unnamed: 0,id,predictions,truths
0,358,0.954265,1
1,1906,0.958759,1
2,207,0.975657,1
3,1579,0.820372,1
4,3885,0.030616,0
...,...,...,...
458,332,0.981398,1
459,462,0.980917,1
460,6032,0.029438,0
461,8870,0.027041,0
