In [1]:
%env WANDB_PROJECT=PII

env: WANDB_PROJECT=PII


In [2]:
%env WANDB_RUN_GROUP=base-3072-filter+T-CE

env: WANDB_RUN_GROUP=base-3072-filter+T-CE


In [3]:
!pip install -q seqeval evaluate

[0m

In [4]:
import json
import copy
import gc
import os
import re
from collections import defaultdict
from pathlib import Path

import torch
from torch import Tensor, nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from spacy.lang.en import English
from transformers.tokenization_utils import PreTrainedTokenizerBase
from transformers.models.deberta_v2 import DebertaV2ForTokenClassification, DebertaV2TokenizerFast
from transformers.trainer import Trainer
from transformers.training_args import TrainingArguments
from transformers.trainer_utils import EvalPrediction, PredictionOutput
from transformers.data.data_collator import DataCollatorForTokenClassification
from sklearn.model_selection import KFold
from datasets import Dataset, DatasetDict, concatenate_datasets
from seqeval.metrics import recall_score, precision_score
import wandb

In [5]:
wandb.login(key="eff994fe72307679c21248b6e7859e26960b8db7")

[34m[1mwandb[0m: Currently logged in as: [33memiz6413[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Config & Parameters

In [6]:
DATA_DIR = Path("../dataset/")
OUTPUT_DIR = "output2"
Path(OUTPUT_DIR).mkdir(exist_ok=True)

In [7]:
# TRAINING_MODEL_PATH = "hf-internal-testing/tiny-random-deberta-v2" 
TRAINING_MODEL_PATH = "microsoft/deberta-v3-base"
# TRAINING_MODEL_PATH = "Gladiator/microsoft-deberta-v3-large_ner_conll2003"
TRAINING_MAX_LENGTH = 3072 if "tiny-random" not in TRAINING_MODEL_PATH else 512
EVAL_MAX_LENGTH = 3072 if "tiny-random" not in TRAINING_MODEL_PATH else 512
CONF_THRESH = 0.9
LR = 2.5e-5  # 1.5e-5 ~ 3e-5 for base # 5e-6 ~ 1e-5 for large
LR_SCHEDULER_TYPE = "linear"
NUM_EPOCHS = 3 if "tiny-random" not in TRAINING_MODEL_PATH else 0.1
BATCH_SIZE = 2
EVAL_BATCH_SIZE = 8
GRAD_ACCUMULATION_STEPS = 16 // BATCH_SIZE
WARMUP_RATIO = 0.1
WEIGHT_DECAY = 0.01
AMP = True
FREEZE_EMBEDDING = False
FREEZE_LAYERS = 0
GAMMA = 0
MASK_P = 0
# training data
N_SPLITS = 4
FILTER_ORIGINAL = True
MOTH = False
PJMATHMATICIAN = False
NICHOLAS = False
MPWARE = False
TONYAROBERTSON = True

In [8]:
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    fp16=AMP,
    learning_rate=LR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=EVAL_BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUMULATION_STEPS,
    report_to="wandb",
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=50,
    save_total_limit=1,
    logging_steps=10,
    metric_for_best_model="f5",
    greater_is_better=True,
    load_best_model_at_end=True,
    overwrite_output_dir=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    warmup_ratio=WARMUP_RATIO,
    weight_decay=WEIGHT_DECAY,
)

## Dataset Preparation

In [9]:
with DATA_DIR.joinpath("train.json").open("r") as f:
    original_data = json.load(f)

extra_data = []  #

if MOTH:
    with DATA_DIR.joinpath("pii_dataset_fixed.json").open("r") as f:
        external = json.load(f)
    print("Moth's datapoints: ", len(external))
    extra_data.extend(external)

if PJMATHMATICIAN:
    with DATA_DIR.joinpath("moredata_dataset_fixed.json").open("r") as f:
        external = json.load(f)
    print("PJMathmatician's datapoints: ", len(external))
    extra_data.extend(external)

if NICHOLAS:
    with DATA_DIR.joinpath("mixtral-8x7b-v1.json").open("r") as f:
        external = json.load(f)
    print("Nicholas' datapoints: ", len(external))
    extra_data.extend(external)
    
if MPWARE:
    with DATA_DIR.joinpath("mpware_mixtral8x7b_v1.1.json").open("r") as f:
        external = json.load(f)
    print("MPWARE's datapoints: ", len(external))
    extra_data.extend(external)
    
if TONYAROBERTSON:
    with DATA_DIR.joinpath("Fake_data_1850_218.json").open("r") as f:
        external = json.load(f)
    print("tonyarobertson's datapoints: ", len(external))
    extra_data.extend(external)
    
print(f"len(extra_data): {len(extra_data)}")

tonyarobertson's datapoints:  1850
len(extra_data): 1850


In [10]:
all_labels = [
    'B-EMAIL', 'B-ID_NUM', 'B-NAME_STUDENT', 'B-PHONE_NUM', 'B-STREET_ADDRESS', 'B-URL_PERSONAL', 'B-USERNAME', 'I-ID_NUM', 'I-NAME_STUDENT', 'I-PHONE_NUM', 'I-STREET_ADDRESS', 'I-URL_PERSONAL', 'O'
]
id2label = {i: l for i, l in enumerate(all_labels)}
label2id = {v: k for k, v in id2label.items()}
target = [l for l in all_labels if l != "O"]

## Tokenization

In [11]:
class CustomTokenizer:
    def __init__(self, tokenizer: PreTrainedTokenizerBase, label2id: dict, max_length: int) -> None:
        self.tokenizer = tokenizer
        self.label2id = label2id
        self.max_length = max_length

    def __call__(self, example: dict) -> dict:
        # rebuild text from tokens
        text, labels, token_map = [], [], []

        for idx, (t, l, ws) in enumerate(
            zip(example["tokens"], example["provided_labels"], example["trailing_whitespace"])
        ):
            text.append(t)
            labels.extend([l] * len(t))
            token_map.extend([idx]*len(t))

            if ws:
                text.append(" ")
                labels.append("O")
                token_map.append(-1)

        text = "".join(text)
        labels = np.array(labels)

        # actual tokenization
        tokenized = self.tokenizer(
            "".join(text),
            return_offsets_mapping=True,
            truncation=True,
            max_length=self.max_length
        )

        token_labels = []

        for start_idx, end_idx in tokenized.offset_mapping:
            # CLS token
            if start_idx == 0 and end_idx == 0:
                token_labels.append(self.label2id["O"])
                continue

            # case when token starts with whitespace
            if text[start_idx].isspace():
                start_idx += 1

            token_labels.append(self.label2id[labels[start_idx]])

        length = len(tokenized.input_ids)

        return {**tokenized, "labels": token_labels, "length": length, "token_map": token_map}

## Augmentation

In [12]:
class RandomCutOut:
    def __init__(self, mask_p: float, mask_token_id: int) -> None:
        self.mask_p = mask_p
        self.mask_token_id = mask_token_id
        
    def __call__(self, batch: dict) -> dict:
        if self.mask_p == 0:
            return batch
        new_input_ids_list = []
        for input_ids in batch["input_ids"]:
            mask = np.random.binomial(1, p=self.mask_p, size=(len(input_ids),))
            new_input_ids = np.where(mask, self.mask_token_id, input_ids)
            new_input_ids_list.append(new_input_ids.tolist())
        batch["input_ids"] = new_input_ids_list
        return batch

## Instantiate the dataset

In [13]:
tokenizer = DebertaV2TokenizerFast.from_pretrained(TRAINING_MODEL_PATH)
train_encoder = CustomTokenizer(tokenizer=tokenizer, label2id=label2id, max_length=TRAINING_MAX_LENGTH)
eval_encoder = CustomTokenizer(tokenizer=tokenizer, label2id=label2id, max_length=EVAL_MAX_LENGTH)
train_augmentation = RandomCutOut(mask_p=MASK_P, mask_token_id=tokenizer.mask_token_id)

ds = DatasetDict()

for key, data in zip(["original", "extra"], [original_data, extra_data]):
    ds[key] = Dataset.from_dict({
        "full_text": [x["full_text"] for x in data],
        "document": [str(x["document"]) for x in data],
        "tokens": [x["tokens"] for x in data],
        "trailing_whitespace": [x["trailing_whitespace"] for x in data],
        "provided_labels": [x["labels"] for x in data],
    })

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Metrics

In [14]:
class MetricsComputer:
    def __init__(self, all_labels: list[str], beta: float = 5.0) -> None:
        self.all_labels = all_labels
        self.beta = beta
        
    def __call__(self, preds: EvalPrediction) -> dict[str, float]:
        predictions, labels = preds
        predictions = np.argmax(predictions, axis=2)

        # Remove ignored index (special tokens)
        true_predictions = [
            [self.all_labels[p] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]
        true_labels = [
            [self.all_labels[l] for (p, l) in zip(prediction, label) if l != -100]
            for prediction, label in zip(predictions, labels)
        ]

        recall = recall_score(true_labels, true_predictions)
        precision = precision_score(true_labels, true_predictions)
        f5_score = (1 + self.beta ** 2) * recall * precision / ((self.beta ** 2) * precision + recall)

        results = {
            'recall': recall,
            'precision': precision,
            'f5': f5_score
        }
        return results

# compute_metrics = MetricsComputer(all_labels=all_labels)

In [15]:
def find_span(target: list[str], document: list[str]) -> list[list[int]]:
    idx = 0
    spans = []
    span = []

    for i, token in enumerate(document):
        if token != target[idx]:
            idx = 0
            span = []
            continue
        span.append(i)
        idx += 1
        if idx == len(target):
            spans.append(span)
            span = []
            idx = 0
            continue
    
    return spans

In [16]:
class PRFScore:
    """A precision / recall / F score."""

    def __init__(
        self,
        *,
        tp: int = 0,
        fp: int = 0,
        fn: int = 0,
    ) -> None:
        self.tp = tp
        self.fp = fp
        self.fn = fn

    def __len__(self) -> int:
        return self.tp + self.fp + self.fn

    def __iadd__(self, other):  # in-place add
        self.tp += other.tp
        self.fp += other.fp
        self.fn += other.fn
        return self

    def __add__(self, other):
        return PRFScore(
            tp=self.tp + other.tp, fp=self.fp + other.fp, fn=self.fn + other.fn
        )

    def score_set(self, cand: set, gold: set) -> None:
        self.tp += len(cand.intersection(gold))
        self.fp += len(cand - gold)
        self.fn += len(gold - cand)

    @property
    def precision(self) -> float:
        return self.tp / (self.tp + self.fp + 1e-100)

    @property
    def recall(self) -> float:
        return self.tp / (self.tp + self.fn + 1e-100)

    @property
    def f1(self) -> float:
        p = self.precision
        r = self.recall
        return 2 * ((p * r) / (p + r + 1e-100))

    @property
    def f5(self) -> float:
        beta = 5
        p = self.precision
        r = self.recall

        fbeta = (1+(beta**2))*p*r / ((beta**2)*p + r + 1e-100)
        return fbeta

    def to_dict(self) -> dict[str, float]:
        return {"p": self.precision, "r": self.recall, "f5": self.f5}


class MetricsComputerV2:
    nlp = English()

    def __init__(self, eval_ds: Dataset, label2id: dict, conf_thresh: float = 0.9) -> None:
        self.ds = eval_ds.remove_columns("labels").rename_columns({"provided_labels": "labels"})
        self.gt_df = self.create_gt_df(self.ds)
        self.label2id = label2id
        self.confth = conf_thresh
        self._search_gt()
        
    def __call__(self, eval_preds: EvalPrediction) -> dict:
        pred_df = self.create_pred_df(eval_preds.predictions)
        return self.compute_metrics_from_df(self.gt_df, pred_df)
    
    def _search_gt(self) -> None:
        email_regex = re.compile(r'[\w.+-]+@[\w-]+\.[\w.-]+')
        phone_num_regex = re.compile(r"(\(\d{3}\)\d{3}\-\d{4}\w*|\d{3}\.\d{3}\.\d{4})\s")
        self.emails = []
        self.phone_nums = []

        for _data in self.ds:
            # email
            for token_idx, token in enumerate(_data["tokens"]):
                if re.fullmatch(email_regex, token) is not None:
                    self.emails.append(
                        {"document": _data["document"], "token": token_idx, "label": "B-EMAIL", "token_str": token}
                    )
            # phone number
            matches = phone_num_regex.findall(_data["full_text"])
            if not matches:
                continue
            for match in matches:
                target = [t.text for t in self.nlp.tokenizer(match)]
                matched_spans = find_span(target, _data["tokens"])
            for matched_span in matched_spans:
                for intermediate, token_idx in enumerate(matched_span):
                    prefix = "I" if intermediate else "B"
                    self.phone_nums.append(
                        {"document": _data["document"], "token": token_idx, "label": f"{prefix}-PHONE_NUM", "token_str": _data["tokens"][token_idx]}
                    )
        
    @staticmethod
    def create_gt_df(ds: Dataset):
        gt = []
        for row in ds:
            for token_idx, (token, label) in enumerate(zip(row["tokens"], row["labels"])):
                if label == "O":
                    continue
                gt.append(
                    {"document": row["document"], "token": token_idx, "label": label, "token_str": token}
                )
        gt_df = pd.DataFrame(gt)
        gt_df["row_id"] = gt_df.index
        
        return gt_df
    
    def create_pred_df(self, prediction: np.ndarray) -> pd.DataFrame:
        ### construct prediction df
        o_index = self.label2id["O"]
        preds = prediction.argmax(-1)
        preds_without_o = prediction[:,:,:o_index].argmax(-1)
        o_preds = prediction[:,:,o_index]
        preds_final = np.where(o_preds < self.confth, preds_without_o , preds)

        pairs = set()
        processed = []

        # Iterate over document
        for p_doc, token_map, offsets, tokens, doc in zip(
            preds_final, self.ds["token_map"], self.ds["offset_mapping"], self.ds["tokens"], self.ds["document"]
        ):
            # Iterate over sequence
            for p_token, (start_idx, end_idx) in zip(p_doc, offsets):
                label_pred = id2label[p_token]

                if start_idx + end_idx == 0:
                    # [CLS] token i.e. BOS
                    continue

                if token_map[start_idx] == -1:
                    start_idx += 1

                # ignore "\n\n"
                while start_idx < len(token_map) and tokens[token_map[start_idx]].isspace():
                    start_idx += 1

                if start_idx >= len(token_map): 
                    break

                token_id = token_map[start_idx]
                pair = (doc, token_id)

                # ignore "O", preds, phone number and  email
                if label_pred in ("O", "B-EMAIL", "B-PHONE_NUM", "I-PHONE_NUM") or token_id == -1:
                    continue   

                if pair in pairs:
                    continue

                processed.append(
                    {"document": doc, "token": token_id, "label": label_pred, "token_str": tokens[token_id]}
                )
                pairs.add(pair)

        pred_df = pd.DataFrame(processed + self.emails + self.phone_nums)
        pred_df["row_id"] = list(range(len(pred_df)))

        return pred_df
        
    def compute_metrics_from_df(self, gt_df, pred_df):
        """
        Compute the LB metric (lb) and other auxiliary metrics
        """

        references = {(row.document, row.token, row.label) for row in gt_df.itertuples()}
        predictions = {(row.document, row.token, row.label) for row in pred_df.itertuples()}

        score_per_type = defaultdict(PRFScore)
        references = set(references)

        for ex in predictions:
            pred_type = ex[-1] # (document, token, label)
            if pred_type != 'O':
                pred_type = pred_type[2:] # avoid B- and I- prefix

            if pred_type not in score_per_type:
                score_per_type[pred_type] = PRFScore()

            if ex in references:
                score_per_type[pred_type].tp += 1
                references.remove(ex)
            else:
                score_per_type[pred_type].fp += 1

        for doc, tok, ref_type in references:
            if ref_type != 'O':
                ref_type = ref_type[2:] # avoid B- and I- prefix

            if ref_type not in score_per_type:
                score_per_type[ref_type] = PRFScore()
            score_per_type[ref_type].fn += 1

        totals = PRFScore()

        for prf in score_per_type.values():
            totals += prf

        return {
            "precision": totals.precision,
            "recall": totals.recall,
            "f5": totals.f5,
            **{
                f"{v_k}-{k}": v_v 
                for k in set([l[2:] for l in self.label2id.keys() if l!= 'O'])
                for v_k, v_v in score_per_type[k].to_dict().items()
            },
        }

## Model

In [17]:
class ModelInit:
    def __init__(
        self, 
        checkpoint: str, 
        id2label: dict, 
        label2id: dict,
        freeze_embedding: bool,
        freeze_layers: int,
    ) -> None:
        self.model = DebertaV2ForTokenClassification.from_pretrained(
            checkpoint,
            num_labels=len(id2label),
            id2label=id2label,
            label2id=label2id,
            ignore_mismatched_sizes=True
        )
        for param in self.model.deberta.embeddings.parameters():
            param.requires_grad = False if freeze_embedding else True
        for layer in self.model.deberta.encoder.layer[:freeze_layers]:
            for param in layer.parameters():
                param.requires_grad = False
        self.weight = copy.deepcopy(self.model.state_dict())

    def __call__(self) -> DebertaV2ForTokenClassification:
        self.model.load_state_dict(self.weight)
        return self.model

model_init = ModelInit(
    TRAINING_MODEL_PATH, 
    id2label=id2label, 
    label2id=label2id, 
    freeze_embedding=FREEZE_EMBEDDING, 
    freeze_layers=FREEZE_LAYERS,
)

Some weights of the model checkpoint at microsoft/deberta-v3-base were not used when initializing DebertaV2ForTokenClassification: ['mask_predictions.LayerNorm.weight', 'mask_predictions.dense.bias', 'mask_predictions.dense.weight', 'deberta.embeddings.position_embeddings.weight', 'mask_predictions.classifier.weight', 'mask_predictions.classifier.bias', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2ForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2ForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a Be

## Split

In [18]:
# split according to document id
folds = [
    (
        np.array([i for i, d in enumerate(ds["original"]["document"]) if int(d) % N_SPLITS != s]),
        np.array([i for i, d in enumerate(ds["original"]["document"]) if int(d) % N_SPLITS == s])
    )
    for s in range(N_SPLITS)
]

exclude_indices = []
if FILTER_ORIGINAL:
    negative_idxs = [i for i, labels in enumerate(ds["original"]["provided_labels"]) if not any(np.array(labels) != "O")]
    exclude_indices = negative_idxs[len(negative_idxs)//3:]

## Trainer with custom loss

In [19]:
class FocalLoss(nn.Module):
    def __init__(
        self, weight = None, gamma = 2., reduction = "mean", ignore_index: int = -100
    ) -> None:
        super().__init__()
        self.ce = nn.CrossEntropyLoss(weight=weight, reduction="none", ignore_index=ignore_index)
        self.ignore_index = ignore_index
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, target):
        logits = logits.permute(0, 2, 1)  # b, seq, c -> b, c, seq
        mask = target != self.ignore_index
        ce_loss = self.ce(logits, target)
        pt = torch.exp(-ce_loss)
        f_loss = (1 - pt) ** self.gamma * ce_loss

        if self.reduction == "mean":
            f_loss = torch.mean(torch.masked_select(f_loss, mask))
        elif self.reduction == "sum":
            f_loss = torch.sum(torch.masked_select(f_loss, mask))
        else:
            f_loss = torch.where(mask, f_loss, 0)

        return f_loss

In [20]:
loss_fn = FocalLoss(gamma=GAMMA)

class FocalLossTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        _, outputs = super().compute_loss(model, inputs, True)
        labels = inputs["labels"]
        loss = loss_fn(logits=outputs["logits"], target=labels)
        outputs["loss"] = loss
        return (loss, outputs) if return_outputs else loss

## Train

#### CV

In [21]:
for fold_idx, (train_idx, eval_idx) in enumerate(folds):
    args.run_name = f"fold-{fold_idx}"
    args.output_dir = os.path.join(OUTPUT_DIR, f"fold_{fold_idx}")
    if Path(args.output_dir).joinpath("eval_result.json").exists():
        continue
    original_ds = ds["original"].select([i for i in train_idx if i not in exclude_indices])
    train_ds = concatenate_datasets([original_ds, ds["extra"]])
    train_ds = train_ds.map(train_encoder, num_proc=os.cpu_count())
    train_ds.set_transform(train_augmentation)
    eval_ds = ds["original"].select(eval_idx)
    eval_ds = eval_ds.map(eval_encoder, num_proc=os.cpu_count())
    trainer = FocalLossTrainer(
        args=args,
        model_init=model_init,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tokenizer,
        compute_metrics=MetricsComputerV2(eval_ds=eval_ds, label2id=label2id),
        data_collator=DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=16),
    )
    trainer.train()
    eval_res = trainer.evaluate(eval_dataset=eval_ds)
    with open(os.path.join(args.output_dir, "eval_result.json"), "w") as f:
        json.dump(eval_res, f)
    del trainer
    gc.collect()
    torch.cuda.empty_cache()
    wandb.finish()

         

#0:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/501 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/212 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4015
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 8
  Total optimization steps = 753
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss,Precision,Recall,F5,P-name Student,R-name Student,F5-name Student,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-url Personal,R-url Personal,F5-url Personal,P-username,R-username,F5-username,P-street Address,R-street Address,F5-street Address,P-phone Num,R-phone Num,F5-phone Num
50,0.1001,0.012512,0.178218,0.027356,0.028276,0.090909,0.001672,0.001738,1.0,1.0,1.0,0.033898,0.076923,0.073343,0.4375,0.28,0.283931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100,0.022,0.002853,0.619355,0.729483,0.724528,0.627536,0.72408,0.719821,1.0,1.0,1.0,1.0,0.615385,0.624625,0.469388,0.92,0.88724,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
150,0.0165,0.002088,0.692112,0.826748,0.820608,0.695225,0.827759,0.821734,1.0,1.0,1.0,0.95,0.730769,0.737313,0.55,0.88,0.86015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200,0.0095,0.001885,0.649518,0.920973,0.906403,0.64774,0.934783,0.919117,1.0,1.0,1.0,1.0,0.653846,0.662669,0.6875,0.88,0.870624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
250,0.0052,0.001547,0.685841,0.942249,0.928892,0.677419,0.948161,0.933806,1.0,1.0,1.0,0.956522,0.846154,0.849926,0.766667,0.92,0.912977,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
300,0.0067,0.00158,0.709932,0.955927,0.943355,0.702934,0.961538,0.948123,1.0,1.0,1.0,0.916667,0.846154,0.848665,0.75,0.96,0.949772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
350,0.0044,0.001465,0.720824,0.957447,0.945509,0.727848,0.961538,0.949809,1.0,1.0,1.0,0.916667,0.846154,0.848665,0.555556,1.0,0.970149,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
400,0.0028,0.001331,0.736842,0.957447,0.946547,0.726131,0.966555,0.954401,1.0,1.0,1.0,0.956522,0.846154,0.849926,0.88,0.88,0.88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
450,0.0029,0.001625,0.691209,0.955927,0.942051,0.684148,0.959866,0.945215,1.0,1.0,1.0,0.916667,0.846154,0.848665,0.694444,1.0,0.983359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
500,0.0025,0.001701,0.71126,0.969605,0.956246,0.7164,0.971572,0.958442,1.0,1.0,1.0,0.923077,0.923077,0.923077,0.510204,1.0,0.964392,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1698
  Batch size = 8
Saving model checkpoint to output2/fold_0/checkpoint-50
Configuration saved in output2/fold_0/checkpoint-50/config.json
Model weights saved in output2/fold_0/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output2/fold_0/checkpoint-50/tokenizer_config.json
Special tokens file saved in output2/fold_0/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forwar

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/f5,▁▆▇█████████████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▆▆▆▇▇▇▇▇███████
eval/f5-NAME_STUDENT,▁▆▇█████████████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-URL_PERSONAL,▁▇▇▇▇██▇████████
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.96019
eval/f5-EMAIL,1.0
eval/f5-ID_NUM,0.88593
eval/f5-NAME_STUDENT,0.96369
eval/f5-PHONE_NUM,0.0
eval/f5-STREET_ADDRESS,0.0
eval/f5-URL_PERSONAL,0.98187
eval/f5-USERNAME,0.0
eval/loss,0.00166
eval/p-EMAIL,1.0


         

#0:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/502 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/215 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/215 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/214 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4017
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 8
  Total optimization steps = 753
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669338516658173, max=1.0…

Step,Training Loss,Validation Loss,Precision,Recall,F5,P-name Student,R-name Student,F5-name Student,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-url Personal,R-url Personal,F5-url Personal,P-username,R-username,F5-username,P-street Address,R-street Address,F5-street Address,P-phone Num,R-phone Num,F5-phone Num
50,0.0835,0.013891,0.108303,0.041209,0.042215,0.0625,0.001548,0.001608,1.0,1.0,1.0,0.004878,0.037037,0.029545,0.272727,0.181818,0.184179,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
100,0.0222,0.00351,0.650307,0.728022,0.724691,0.684438,0.735294,0.733199,1.0,1.0,1.0,0.625,0.185185,0.190337,0.383562,0.848485,0.81069,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
150,0.0176,0.002815,0.58636,0.909341,0.890475,0.576885,0.911765,0.891853,1.0,1.0,1.0,0.75,0.888889,0.882603,0.5625,0.818182,0.804124,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
200,0.0087,0.002506,0.647003,0.934066,0.918394,0.654584,0.950464,0.934223,1.0,1.0,1.0,0.590909,0.481481,0.484935,0.534483,0.939394,0.912797,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
250,0.0077,0.00256,0.690476,0.956044,0.942107,0.696529,0.962848,0.948894,1.0,1.0,1.0,0.678571,0.703704,0.702703,0.540984,1.0,0.968397,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
300,0.0069,0.002132,0.776271,0.943681,0.935918,0.788586,0.941176,0.934224,1.0,1.0,1.0,0.65,0.962963,0.945455,0.645833,0.939394,0.923253,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
350,0.0033,0.002262,0.743702,0.932692,0.923665,0.746667,0.95356,0.943505,1.0,1.0,1.0,0.625,0.555556,0.55794,0.65,0.787879,0.781503,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
400,0.005,0.002023,0.778153,0.949176,0.94122,0.792308,0.956656,0.949084,1.0,1.0,1.0,0.619048,0.962963,0.942817,0.625,0.757576,0.751445,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
450,0.0034,0.001485,0.804762,0.928571,0.923109,0.812416,0.931889,0.926647,1.0,1.0,1.0,0.666667,0.814815,0.80791,0.714286,0.909091,0.899654,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
500,0.0041,0.001499,0.816748,0.924451,0.919786,0.831025,0.928793,0.924609,1.0,1.0,1.0,0.642857,0.666667,0.665718,0.673469,1.0,0.981693,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1714
  Batch size = 8
Saving model checkpoint to output2/fold_1/checkpoint-50
Configuration saved in output2/fold_1/checkpoint-50/config.json
Model weights saved in output2/fold_1/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output2/fold_1/checkpoint-50/tokenizer_config.json
Special tokens file saved in output2/fold_1/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forwar

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/f5,▁▆██████████████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▂█▄▆█▅█▇▆▆▅▆▆▆▆
eval/f5-NAME_STUDENT,▁▆██████████████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-URL_PERSONAL,▁▆▆▇█▇▆▆▇███████
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▂▂▂▂▁▂▁▁▁▁▁▁▁▁▂
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.94211
eval/f5-EMAIL,1.0
eval/f5-ID_NUM,0.7027
eval/f5-NAME_STUDENT,0.94889
eval/f5-PHONE_NUM,1.0
eval/f5-STREET_ADDRESS,0.0
eval/f5-URL_PERSONAL,0.9684
eval/f5-USERNAME,0.0
eval/loss,0.00256
eval/p-EMAIL,1.0


         

#0:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/505 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/211 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4040
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 8
  Total optimization steps = 756
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss,Precision,Recall,F5,P-name Student,R-name Student,F5-name Student,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-url Personal,R-url Personal,F5-url Personal,P-username,R-username,F5-username,P-street Address,R-street Address,F5-street Address,P-phone Num,R-phone Num,F5-phone Num
50,0.0905,0.012978,0.106383,0.01355,0.014021,0.0,0.0,0.0,0.888889,1.0,0.995215,0.0,0.0,0.0,0.25,0.035714,0.036932,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
100,0.0276,0.00453,0.403832,0.742547,0.719342,0.416096,0.73525,0.714181,0.888889,1.0,0.995215,0.52381,0.785714,0.770889,0.490566,0.928571,0.897742,0.0,0.0,0.0,0.156863,0.727273,0.638037,1.0,1.0,1.0
150,0.0134,0.002599,0.681871,0.789973,0.785185,0.692206,0.792738,0.788335,0.888889,1.0,0.995215,0.923077,0.857143,0.859504,0.627907,0.964286,0.944818,0.222222,0.5,0.477064,0.391304,0.409091,0.408377,1.0,1.0,1.0
200,0.0196,0.003318,0.511232,0.894309,0.869257,0.507173,0.909228,0.882326,0.888889,1.0,0.995215,1.0,0.714286,0.722222,0.588235,0.714286,0.708447,0.214286,0.75,0.684211,0.447368,0.772727,0.751701,1.0,1.0,1.0
250,0.0084,0.002758,0.60945,0.856369,0.843229,0.598344,0.874433,0.859185,0.888889,1.0,0.995215,0.933333,1.0,0.99726,0.580645,0.642857,0.640219,1.0,0.5,0.509804,0.846154,0.5,0.507993,1.0,1.0,1.0
300,0.0049,0.001974,0.719178,0.853659,0.847563,0.707559,0.863843,0.856566,0.888889,1.0,0.995215,0.857143,0.857143,0.857143,0.857143,0.642857,0.649098,1.0,0.5,0.509804,0.818182,0.818182,0.818182,1.0,1.0,1.0
350,0.0038,0.001754,0.852388,0.798103,0.800063,0.850394,0.816944,0.818182,0.888889,1.0,0.995215,0.875,1.0,0.994536,0.826087,0.678571,0.683264,0.0,0.0,0.0,1.0,0.318182,0.32675,1.0,1.0,1.0
400,0.0054,0.002085,0.743713,0.841463,0.837231,0.754768,0.838124,0.834579,0.888889,1.0,0.995215,0.56,1.0,0.970667,0.565217,0.928571,0.906166,0.0,0.0,0.0,0.9,0.818182,0.821053,1.0,1.0,1.0
450,0.0033,0.00181,0.759064,0.879404,0.874074,0.756129,0.886536,0.880694,0.888889,1.0,0.995215,1.0,1.0,1.0,0.605263,0.821429,0.810298,1.0,0.25,0.257426,0.941176,0.727273,0.733686,1.0,1.0,1.0
500,0.0027,0.002142,0.716612,0.894309,0.88586,0.718104,0.8941,0.885751,0.888889,1.0,0.995215,0.538462,1.0,0.968085,0.625,0.892857,0.878378,1.0,0.75,0.757282,0.947368,0.818182,0.822496,1.0,1.0,1.0


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1689
  Batch size = 8
Saving model checkpoint to output2/fold_2/checkpoint-50
Configuration saved in output2/fold_2/checkpoint-50/config.json
Model weights saved in output2/fold_2/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output2/fold_2/checkpoint-50/tokenizer_config.json
Special tokens file saved in output2/fold_2/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forwar

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/f5,▁▇▇███▇█████████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▆▇▆█▇██████████
eval/f5-NAME_STUDENT,▁▇▇███▇█████████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▆▄▇▅█▄█▇███████
eval/f5-URL_PERSONAL,▁██▆▆▆▆█▇▇▇▇▇▇▇▇
eval/f5-USERNAME,▁▁▅▇▆▆▁▁▃██▆▆▆▆█
eval/loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.88586
eval/f5-EMAIL,0.99522
eval/f5-ID_NUM,0.96809
eval/f5-NAME_STUDENT,0.88575
eval/f5-PHONE_NUM,1.0
eval/f5-STREET_ADDRESS,0.8225
eval/f5-URL_PERSONAL,0.87838
eval/f5-USERNAME,0.75728
eval/loss,0.00214
eval/p-EMAIL,0.88889


         

#0:   0%|          | 0/504 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/503 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/213 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4025
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 8
  Total optimization steps = 753
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss,Precision,Recall,F5,P-name Student,R-name Student,F5-name Student,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-url Personal,R-url Personal,F5-url Personal,P-username,R-username,F5-username,P-street Address,R-street Address,F5-street Address,P-phone Num,R-phone Num,F5-phone Num
50,0.1023,0.012511,0.135678,0.043902,0.045075,0.0,0.0,0.0,1.0,1.0,1.0,0.006452,0.083333,0.057143,0.3125,0.2,0.202808,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
100,0.0275,0.004474,0.377857,0.860163,0.819911,0.381443,0.865108,0.82488,1.0,1.0,1.0,0.714286,0.833333,0.828025,0.283333,0.68,0.645255,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
150,0.0156,0.002667,0.660292,0.954472,0.938392,0.665432,0.969424,0.952685,1.0,1.0,1.0,0.714286,0.833333,0.828025,0.395349,0.68,0.661677,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
200,0.0101,0.002493,0.764,0.931707,0.923907,0.763348,0.951439,0.942507,1.0,1.0,1.0,0.909091,0.833333,0.836013,0.565217,0.52,0.521605,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
250,0.006,0.002717,0.583741,0.969106,0.945109,0.623288,0.982014,0.960747,1.0,1.0,1.0,0.392857,0.916667,0.871951,0.24,0.72,0.668571,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
300,0.0039,0.002187,0.656079,0.973984,0.956164,0.668287,0.989209,0.971269,1.0,1.0,1.0,0.55,0.916667,0.89375,0.369565,0.68,0.658718,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
350,0.0065,0.001715,0.859492,0.934959,0.931813,0.868204,0.947842,0.94451,1.0,1.0,1.0,0.647059,0.916667,0.902208,0.666667,0.64,0.640986,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
400,0.0033,0.001879,0.736908,0.960976,0.949867,0.759441,0.976619,0.965994,1.0,1.0,1.0,0.5,0.916667,0.888199,0.457143,0.64,0.630303,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
450,0.0038,0.00294,0.660044,0.972358,0.954978,0.703846,0.98741,0.972343,1.0,1.0,1.0,0.5,0.916667,0.888199,0.223684,0.68,0.630528,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
500,0.0045,0.001921,0.862891,0.941463,0.938178,0.864821,0.955036,0.95122,1.0,1.0,1.0,1.0,0.916667,0.919614,0.64,0.64,0.64,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0


The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1706
  Batch size = 8
Saving model checkpoint to output2/fold_3/checkpoint-50
Configuration saved in output2/fold_3/checkpoint-50/config.json
Model weights saved in output2/fold_3/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output2/fold_3/checkpoint-50/tokenizer_config.json
Special tokens file saved in output2/fold_3/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DebertaV2ForTokenClassification.forwar

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/f5,▁▇██████████████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▇▇▇████████████
eval/f5-NAME_STUDENT,▁▇██████████████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-URL_PERSONAL,▁██▆███▇▇▇██████
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▃▂▂▂▁▁▁▂▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.95795
eval/f5-EMAIL,1.0
eval/f5-ID_NUM,0.89375
eval/f5-NAME_STUDENT,0.97228
eval/f5-PHONE_NUM,1.0
eval/f5-STREET_ADDRESS,0.0
eval/f5-URL_PERSONAL,0.67275
eval/f5-USERNAME,0.0
eval/loss,0.00169
eval/p-EMAIL,1.0


### log CV

In [22]:
wandb.init(name="cv")
results = dict()
for res_json_path in Path(OUTPUT_DIR).glob("fold*/eval_result.json"):
    fold = res_json_path.parent.name.split("_")[-1]
    with open(res_json_path, "r") as f:
        res = json.load(f)
        results[fold] = {k.replace("eval_", ""): v for k, v in res.items()}
results["cv"] = {key: np.mean([r[key] for r in results.values()]) for key in results["0"].keys()}
table = wandb.Table(columns=["fold"] + list(results["0"].keys()))
for f, res in results.items():
    table.add_data(f, *[res[c] for c in table.columns if c != "fold"])
wandb.log({"eval_result": table})
wandb.finish()

VBox(children=(Label(value='0.006 MB of 0.008 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.735485…

#### Train with full data

In [None]:
original_ds = ds["original"].select([i for i in range(len(ds["original"])) if i not in exclude_indices])
train_ds = concatenate_datasets([original_ds, ds["extra"]])
train_ds = train_ds.map(train_encoder, num_proc=os.cpu_count())
train_ds.set_transform(train_augmentation)
args.evaluation_strategy = "no"
args.save_strategy = "no"
args.run_name = f"all_data"
trainer = FocalLossTrainer(
    args=args,
    model_init=model_init,
    train_dataset=train_ds,
    tokenizer=tokenizer,
    data_collator=DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=16),
)
trainer.train()
trainer.save_model(os.path.join(OUTPUT_DIR, "all"))
                           
wandb.finish()

         

#0:   0%|          | 0/594 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/594 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/594 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/594 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/594 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/593 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/593 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/593 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `DebertaV2ForTokenClassification.forward` and have been ignored: full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map. If full_text, document, provided_labels, offset_mapping, tokens, length, trailing_whitespace, token_map are not expected by `DebertaV2ForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4749
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 8
  Total optimization steps = 888
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss
10,3.3962
20,2.9119
30,1.5378
40,0.3367
50,0.0936
60,0.0519
70,0.0415
80,0.0412
90,0.0359
100,0.0307




Training completed. Do not forget to share your model on huggingface.co/models =)


Saving model checkpoint to output2/all
Configuration saved in output2/all/config.json
Model weights saved in output2/all/pytorch_model.bin
tokenizer config file saved in output2/all/tokenizer_config.json
Special tokens file saved in output2/all/special_tokens_map.json


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/learning_rate,▂▃▅▆███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁
train/train_samples_per_second,▁
train/train_steps_per_second,▁

0,1
train/epoch,3.0
train/global_step,888.0
train/learning_rate,0.0
train/loss,0.0023
train/total_flos,6149859980977248.0
train/train_loss,0.10019
train/train_runtime,1194.6084
train/train_samples_per_second,11.926
train/train_steps_per_second,0.743
