In [1]:
%env WANDB_PROJECT=PII
%env WANDB_RUN_GROUP=BiLSTM-longformer-3072-filter+T-CE

env: WANDB_PROJECT=PII
env: WANDB_RUN_GROUP=BiLSTM-longformer-3072-filter+T-CE


In [3]:
import json
import copy
import gc
import os
import re
from collections import defaultdict
from pathlib import Path

import torch
from torch import Tensor, nn
import numpy as np
import pandas as pd
from spacy.lang.en import English
from transformers.tokenization_utils import PreTrainedTokenizerBase
from transformers.models.longformer import LongformerForTokenClassification, LongformerTokenizerFast
from transformers.trainer import Trainer
from transformers.training_args import TrainingArguments
from transformers.trainer_utils import EvalPrediction
from transformers.data.data_collator import DataCollatorForTokenClassification
from datasets import Dataset, DatasetDict, concatenate_datasets
import wandb

In [4]:
wandb.login(key=WANB_KEY)

[34m[1mwandb[0m: Currently logged in as: [33memiz6413[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [5]:
DATA_DIR = Path("../dataset/")
OUTPUT_DIR = "output"
Path(OUTPUT_DIR).mkdir(exist_ok=True)

In [6]:
TRAINING_MODEL_PATH = "allenai/longformer-base-4096"
# TRAINING_MODEL_PATH = "hf-internal-testing/tiny-random-longformer"
TRAINING_MAX_LENGTH = 3072 if "tiny-random" not in TRAINING_MODEL_PATH else 512
EVAL_MAX_LENGTH = 3072 if "tiny-random" not in TRAINING_MODEL_PATH else 512
CONF_THRESH = 0.9
LR = 2.5e-5  # 1.5e-5 ~ 3e-5 for base # 5e-6 ~ 1e-5 for large
LR_SCHEDULER_TYPE = "linear"
NUM_EPOCHS = 3 if "tiny-random" not in TRAINING_MODEL_PATH else 0.1
BATCH_SIZE = 8
EVAL_BATCH_SIZE = 8
GRAD_ACCUMULATION_STEPS = 16 // BATCH_SIZE
WARMUP_RATIO = 0.1
WEIGHT_DECAY = 0.01
FREEZE_EMBEDDING = False
FREEZE_LAYERS = 0
GAMMA = 0
MASK_P = 0
# training data
N_SPLITS = 4
FILTER_ORIGINAL = True
MOTH = False
PJMATHMATICIAN = False
NICHOLAS = False
MPWARE = False
TONYAROBERTSON = True

In [7]:
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    fp16=torch.cuda.is_available(),
    learning_rate=LR,
    num_train_epochs=NUM_EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=EVAL_BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACCUMULATION_STEPS,
    report_to="wandb",
    evaluation_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=50,
    save_total_limit=1,
    logging_steps=10,
    metric_for_best_model="f5",
    greater_is_better=True,
    load_best_model_at_end=True,
    overwrite_output_dir=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    warmup_ratio=WARMUP_RATIO,
    weight_decay=WEIGHT_DECAY,
)

## Load dataset

In [8]:
with DATA_DIR.joinpath("train.json").open("r") as f:
    original_data = json.load(f)

extra_data = []  #

if MOTH:
    with DATA_DIR.joinpath("pii_dataset_fixed.json").open("r") as f:
        external = json.load(f)
    print("Moth's datapoints: ", len(external))
    extra_data.extend(external)

if PJMATHMATICIAN:
    with DATA_DIR.joinpath("moredata_dataset_fixed.json").open("r") as f:
        external = json.load(f)
    print("PJMathmatician's datapoints: ", len(external))
    extra_data.extend(external)

if NICHOLAS:
    with DATA_DIR.joinpath("mixtral-8x7b-v1.json").open("r") as f:
        external = json.load(f)
    print("Nicholas' datapoints: ", len(external))
    extra_data.extend(external)

if MPWARE:
    with DATA_DIR.joinpath("mpware_mixtral8x7b_v1.1.json").open("r") as f:
        external = json.load(f)
    print("MPWARE's datapoints: ", len(external))
    extra_data.extend(external)

if TONYAROBERTSON:
    with DATA_DIR.joinpath("Fake_data_1850_218.json").open("r") as f:
        external = json.load(f)
    print("tonyarobertson's datapoints: ", len(external))
    extra_data.extend(external)

print(f"len(extra_data): {len(extra_data)}")

tonyarobertson's datapoints:  1850
len(extra_data): 1850


In [9]:
all_labels = [
    'B-EMAIL', 'B-ID_NUM', 'B-NAME_STUDENT', 'B-PHONE_NUM', 'B-STREET_ADDRESS', 'B-URL_PERSONAL', 'B-USERNAME', 'I-ID_NUM', 'I-NAME_STUDENT', 'I-PHONE_NUM', 'I-STREET_ADDRESS', 'I-URL_PERSONAL', 'O'
]
id2label = {i: l for i, l in enumerate(all_labels)}
label2id = {v: k for k, v in id2label.items()}
target = [l for l in all_labels if l != "O"]

## Tokenization

In [10]:
class CustomTokenizer:
    def __init__(self, tokenizer: PreTrainedTokenizerBase, label2id: dict, max_length: int) -> None:
        self.tokenizer = tokenizer
        self.label2id = label2id
        self.max_length = max_length

    def __call__(self, example: dict) -> dict:
        # rebuild text from tokens
        text, labels, token_map = [], [], []

        for idx, (t, l, ws) in enumerate(
            zip(example["tokens"], example["provided_labels"], example["trailing_whitespace"])
        ):
            text.append(t)
            labels.extend([l] * len(t))
            token_map.extend([idx]*len(t))

            if ws:
                text.append(" ")
                labels.append("O")
                token_map.append(-1)

        text = "".join(text)
        labels = np.array(labels)

        # actual tokenization
        tokenized = self.tokenizer(
            "".join(text),
            return_offsets_mapping=True,
            truncation=True,
            max_length=self.max_length
        )

        token_labels = []

        for start_idx, end_idx in tokenized.offset_mapping:
            # CLS token
            if start_idx == 0 and end_idx == 0:
                token_labels.append(self.label2id["O"])
                continue

            # case when token starts with whitespace
            if text[start_idx].isspace():
                start_idx += 1

            try:
                token_labels.append(self.label2id[labels[start_idx]])
            except:
                continue

        length = len(tokenized.input_ids)

        return {**tokenized, "labels": token_labels, "length": length, "token_map": token_map}

## Instanciate the dataset

In [11]:
tokenizer = LongformerTokenizerFast.from_pretrained(TRAINING_MODEL_PATH)
train_encoder = CustomTokenizer(tokenizer=tokenizer, label2id=label2id, max_length=TRAINING_MAX_LENGTH)
eval_encoder = CustomTokenizer(tokenizer=tokenizer, label2id=label2id, max_length=EVAL_MAX_LENGTH)

ds = DatasetDict()

for key, data in zip(["original", "extra"], [original_data, extra_data]):
    ds[key] = Dataset.from_dict({
        "full_text": [x["full_text"] for x in data],
        "document": [str(x["document"]) for x in data],
        "tokens": [x["tokens"] for x in data],
        "trailing_whitespace": [x["trailing_whitespace"] for x in data],
        "provided_labels": [x["labels"] for x in data],
    })

## Metrics

In [12]:
def find_span(target: list[str], document: list[str]) -> list[list[int]]:
    idx = 0
    spans = []
    span = []

    for i, token in enumerate(document):
        if token != target[idx]:
            idx = 0
            span = []
            continue
        span.append(i)
        idx += 1
        if idx == len(target):
            spans.append(span)
            span = []
            idx = 0
            continue

    return spans


class PRFScore:
    """A precision / recall / F score."""

    def __init__(
        self,
        *,
        tp: int = 0,
        fp: int = 0,
        fn: int = 0,
    ) -> None:
        self.tp = tp
        self.fp = fp
        self.fn = fn

    def __len__(self) -> int:
        return self.tp + self.fp + self.fn

    def __iadd__(self, other):  # in-place add
        self.tp += other.tp
        self.fp += other.fp
        self.fn += other.fn
        return self

    def __add__(self, other):
        return PRFScore(
            tp=self.tp + other.tp, fp=self.fp + other.fp, fn=self.fn + other.fn
        )

    def score_set(self, cand: set, gold: set) -> None:
        self.tp += len(cand.intersection(gold))
        self.fp += len(cand - gold)
        self.fn += len(gold - cand)

    @property
    def precision(self) -> float:
        return self.tp / (self.tp + self.fp + 1e-100)

    @property
    def recall(self) -> float:
        return self.tp / (self.tp + self.fn + 1e-100)

    @property
    def f1(self) -> float:
        p = self.precision
        r = self.recall
        return 2 * ((p * r) / (p + r + 1e-100))

    @property
    def f5(self) -> float:
        beta = 5
        p = self.precision
        r = self.recall

        fbeta = (1+(beta**2))*p*r / ((beta**2)*p + r + 1e-100)
        return fbeta

    def to_dict(self) -> dict[str, float]:
        return {"p": self.precision, "r": self.recall, "f5": self.f5}


class MetricsComputerV2:
    nlp = English()

    def __init__(self, eval_ds: Dataset, label2id: dict, conf_thresh: float = 0.9) -> None:
        self.ds = eval_ds.remove_columns("labels").rename_columns({"provided_labels": "labels"})
        self.gt_df = self.create_gt_df(self.ds)
        self.label2id = label2id
        self.confth = conf_thresh
        self._search_gt()

    def __call__(self, eval_preds: EvalPrediction) -> dict:
        pred_df = self.create_pred_df(eval_preds.predictions)
        return self.compute_metrics_from_df(self.gt_df, pred_df)

    def _search_gt(self) -> None:
        email_regex = re.compile(r'[\w.+-]+@[\w-]+\.[\w.-]+')
        phone_num_regex = re.compile(r"(\(\d{3}\)\d{3}\-\d{4}\w*|\d{3}\.\d{3}\.\d{4})\s")
        self.emails = []
        self.phone_nums = []

        for _data in self.ds:
            # email
            for token_idx, token in enumerate(_data["tokens"]):
                if re.fullmatch(email_regex, token) is not None:
                    self.emails.append(
                        {"document": _data["document"], "token": token_idx, "label": "B-EMAIL", "token_str": token}
                    )
            # phone number
            matches = phone_num_regex.findall(_data["full_text"])
            if not matches:
                continue
            for match in matches:
                target = [t.text for t in self.nlp.tokenizer(match)]
                matched_spans = find_span(target, _data["tokens"])
            for matched_span in matched_spans:
                for intermediate, token_idx in enumerate(matched_span):
                    prefix = "I" if intermediate else "B"
                    self.phone_nums.append(
                        {"document": _data["document"], "token": token_idx, "label": f"{prefix}-PHONE_NUM", "token_str": _data["tokens"][token_idx]}
                    )

    @staticmethod
    def create_gt_df(ds: Dataset):
        gt = []
        for row in ds:
            for token_idx, (token, label) in enumerate(zip(row["tokens"], row["labels"])):
                if label == "O":
                    continue
                gt.append(
                    {"document": row["document"], "token": token_idx, "label": label, "token_str": token}
                )
        gt_df = pd.DataFrame(gt)
        gt_df["row_id"] = gt_df.index

        return gt_df

    def create_pred_df(self, prediction: np.ndarray) -> pd.DataFrame:
        ### construct prediction df
        o_index = self.label2id["O"]
        preds = prediction.argmax(-1)
        preds_without_o = prediction[:,:,:o_index].argmax(-1)
        o_preds = prediction[:,:,o_index]
        preds_final = np.where(o_preds < self.confth, preds_without_o , preds)

        pairs = set()
        processed = []

        # Iterate over document
        for p_doc, token_map, offsets, tokens, doc in zip(
            preds_final, self.ds["token_map"], self.ds["offset_mapping"], self.ds["tokens"], self.ds["document"]
        ):
            # Iterate over sequence
            for p_token, (start_idx, end_idx) in zip(p_doc, offsets):
                label_pred = id2label[p_token]

                if start_idx + end_idx == 0:
                    # [CLS] token i.e. BOS
                    continue

                if token_map[start_idx] == -1:
                    start_idx += 1

                # ignore "\n\n"
                while start_idx < len(token_map) and tokens[token_map[start_idx]].isspace():
                    start_idx += 1

                if start_idx >= len(token_map):
                    break

                token_id = token_map[start_idx]
                pair = (doc, token_id)

                # ignore "O", preds, phone number and  email
                if label_pred in ("O", "B-EMAIL", "B-PHONE_NUM", "I-PHONE_NUM") or token_id == -1:
                    continue

                if pair in pairs:
                    continue

                processed.append(
                    {"document": doc, "token": token_id, "label": label_pred, "token_str": tokens[token_id]}
                )
                pairs.add(pair)

        pred_df = pd.DataFrame(processed + self.emails + self.phone_nums)
        pred_df["row_id"] = list(range(len(pred_df)))

        return pred_df

    def compute_metrics_from_df(self, gt_df, pred_df):
        """
        Compute the LB metric (lb) and other auxiliary metrics
        """

        references = {(row.document, row.token, row.label) for row in gt_df.itertuples()}
        predictions = {(row.document, row.token, row.label) for row in pred_df.itertuples()}

        score_per_type = defaultdict(PRFScore)
        references = set(references)

        for ex in predictions:
            pred_type = ex[-1] # (document, token, label)
            if pred_type != 'O':
                pred_type = pred_type[2:] # avoid B- and I- prefix

            if pred_type not in score_per_type:
                score_per_type[pred_type] = PRFScore()

            if ex in references:
                score_per_type[pred_type].tp += 1
                references.remove(ex)
            else:
                score_per_type[pred_type].fp += 1

        for doc, tok, ref_type in references:
            if ref_type != 'O':
                ref_type = ref_type[2:] # avoid B- and I- prefix

            if ref_type not in score_per_type:
                score_per_type[ref_type] = PRFScore()
            score_per_type[ref_type].fn += 1

        totals = PRFScore()

        for prf in score_per_type.values():
            totals += prf

        return {
            "precision": totals.precision,
            "recall": totals.recall,
            "f5": totals.f5,
            **{
                f"{v_k}-{k}": v_v
                for k in set([l[2:] for l in self.label2id.keys() if l!= 'O'])
                for v_k, v_v in score_per_type[k].to_dict().items()
            },
        }

## Model

In [13]:
class BiLSTMHead(nn.Module):
    def __init__(self, in_features: int, hidden_dim: int, num_layers: int = 1, dropout: float = 0.1):
        super().__init__()
        self.lstm = nn.LSTM(
            in_features,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=True,
            dropout=dropout,
        )
        self.out_features = hidden_dim

    def forward(self, x: Tensor):
        self.lstm.flatten_parameters()
        h, _ = self.lstm(x)
        h_f, h_b = torch.split(h, h.size(-1)//2, dim=-1)  # split forward and backward
        h = torch.stack([h_f, h_b], dim=0).mean(dim=0)
        return h


class BiLSTMLongFormerForTokenClassification(LongformerForTokenClassification):
    def __init__(self, config):
        super().__init__(config)
        self.bi_lstm = BiLSTMHead(
            in_features=self.config.hidden_size,
            hidden_dim=self.config.hidden_size,
        )
        self.classifier = nn.Sequential(self.bi_lstm, self.classifier)

In [14]:
class ModelInit:
    model_class = BiLSTMLongFormerForTokenClassification

    def __init__(
        self,
        checkpoint: str,
        id2label: dict,
        label2id: dict,
        freeze_embedding: bool,
        freeze_layers: int,
    ) -> None:
        self.model = self.model_class.from_pretrained(
            checkpoint,
            num_labels=len(id2label),
            id2label=id2label,
            label2id=label2id,
            ignore_mismatched_sizes=True
        )
        for param in self.model.base_model.embeddings.parameters():
            param.requires_grad = False if freeze_embedding else True
        for layer in self.model.base_model.encoder.layer[:freeze_layers]:
            for param in layer.parameters():
                param.requires_grad = False
        self.weight = copy.deepcopy(self.model.state_dict())

    def __call__(self) -> model_class:
        self.model.load_state_dict(self.weight)
        return self.model

model_init = ModelInit(
    TRAINING_MODEL_PATH,
    id2label=id2label,
    label2id=label2id,
    freeze_embedding=FREEZE_EMBEDDING,
    freeze_layers=FREEZE_LAYERS,
)

Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing BiLSTMLongFormerForTokenClassification: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing BiLSTMLongFormerForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BiLSTMLongFormerForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BiLSTMLongFormerForTokenClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.0.lstm.bias_ih_l0_reverse', '

## Split

In [15]:
# split according to document id
folds = [
    (
        np.array([i for i, d in enumerate(ds["original"]["document"]) if int(d) % N_SPLITS != s]),
        np.array([i for i, d in enumerate(ds["original"]["document"]) if int(d) % N_SPLITS == s])
    )
    for s in range(N_SPLITS)
]

exclude_indices = []
if FILTER_ORIGINAL:
    negative_idxs = [i for i, labels in enumerate(ds["original"]["provided_labels"]) if not any(np.array(labels) != "O")]
    exclude_indices = negative_idxs[len(negative_idxs)//3:]

## Train

In [16]:
for fold_idx, (train_idx, eval_idx) in enumerate(folds):
    args.run_name = f"fold-{fold_idx}"
    args.output_dir = os.path.join(OUTPUT_DIR, f"fold_{fold_idx}")
    if Path(args.output_dir).joinpath("eval_result.json").exists():
        continue
    original_ds = ds["original"].select([i for i in train_idx if i not in exclude_indices])
    train_ds = concatenate_datasets([original_ds, ds["extra"]])
    train_ds = train_ds.map(train_encoder, num_proc=os.cpu_count())
    eval_ds = ds["original"].select(eval_idx)
    eval_ds = eval_ds.map(eval_encoder, num_proc=os.cpu_count())
    trainer = Trainer(
        args=args,
        model_init=model_init,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tokenizer,
        compute_metrics=MetricsComputerV2(eval_ds=eval_ds, label2id=label2id),
        data_collator=DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=512),
    )
    trainer.train()
    eval_res = trainer.evaluate(eval_dataset=eval_ds)
    with open(os.path.join(args.output_dir, "eval_result.json"), "w") as f:
        json.dump(eval_res, f)
    del trainer
    gc.collect()
    torch.cuda.empty_cache()
    wandb.finish()

         

#0:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/501 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/212 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4015
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 2
  Total optimization steps = 753
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss,Precision,Recall,F5,P-username,R-username,F5-username,P-url Personal,R-url Personal,F5-url Personal,P-street Address,R-street Address,F5-street Address,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-name Student,R-name Student,F5-name Student,P-phone Num,R-phone Num,F5-phone Num
50,0.6513,0.279198,1.0,0.012158,0.012638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100,0.1114,0.016955,1.0,0.012158,0.012638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
150,0.058,0.00782,0.347368,0.050152,0.051859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.3125,0.041806,0.043247,0.0,0.0,0.0
200,0.0532,0.005712,0.541555,0.306991,0.312192,0.0,0.0,0.0,0.272727,0.24,0.241113,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.548105,0.314381,0.319623,0.0,0.0,0.0
250,0.0321,0.004034,0.494118,0.446809,0.44846,0.0,0.0,0.0,0.617647,0.84,0.828528,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.483577,0.443144,0.444573,0.0,0.0,0.0
300,0.0306,0.004104,0.367188,0.571429,0.55946,0.0,0.0,0.0,0.5,1.0,0.962963,0.0,0.0,0.0,1.0,1.0,1.0,0.818182,0.346154,0.354009,0.351579,0.558528,0.546164,0.0,0.0,0.0
350,0.0233,0.003043,0.602273,0.644377,0.642649,0.0,0.0,0.0,0.423729,1.0,0.950292,0.0,0.0,0.0,1.0,1.0,1.0,0.846154,0.423077,0.431373,0.620915,0.635452,0.63488,0.0,0.0,0.0
400,0.0201,0.002582,0.730769,0.721884,0.722222,0.0,0.0,0.0,0.78125,1.0,0.989346,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.153846,0.159021,0.723967,0.732441,0.732112,0.0,0.0,0.0
450,0.0208,0.002651,0.692935,0.775076,0.771558,0.0,0.0,0.0,0.568182,1.0,0.971599,0.0,0.0,0.0,1.0,1.0,1.0,0.894737,0.653846,0.660688,0.691729,0.769231,0.76593,0.0,0.0,0.0
500,0.0201,0.002649,0.725437,0.819149,0.815099,0.0,0.0,0.0,0.423729,1.0,0.950292,0.0,0.0,0.0,1.0,1.0,1.0,0.9,0.692308,0.698507,0.743902,0.816054,0.813021,0.0,0.0,0.0


The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1698
  Batch size = 8
Saving model checkpoint to output/fold_0/checkpoint-50
Configuration saved in output/fold_0/checkpoint-50/config.json
Model weights saved in output/fold_0/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output/fold_0/checkpoint-50/tokenizer_config.json
Special tokens file saved in output/fold_0/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClass

0,1
eval/f5,▁▁▁▄▅▆▆▇▇███████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▁▁▁▁▄▅▂▇▇▇▅▇███
eval/f5-NAME_STUDENT,▁▁▁▄▅▆▆▇▇███████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-URL_PERSONAL,▁▁▁▃▇███████████
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.83925
eval/f5-EMAIL,1.0
eval/f5-ID_NUM,0.77266
eval/f5-NAME_STUDENT,0.83522
eval/f5-PHONE_NUM,0.0
eval/f5-STREET_ADDRESS,0.0
eval/f5-URL_PERSONAL,0.98336
eval/f5-USERNAME,0.0
eval/loss,0.00206
eval/p-EMAIL,1.0


         

#0:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/502 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/502 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/215 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/215 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/214 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4017
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 2
  Total optimization steps = 753
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01667069723334862, max=1.0)…

Step,Training Loss,Validation Loss,Precision,Recall,F5,P-username,R-username,F5-username,P-url Personal,R-url Personal,F5-url Personal,P-street Address,R-street Address,F5-street Address,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-name Student,R-name Student,F5-name Student,P-phone Num,R-phone Num,F5-phone Num
50,0.6444,0.280189,1.0,0.03022,0.031391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
100,0.1224,0.01827,1.0,0.03022,0.031391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
150,0.0665,0.008359,0.680851,0.043956,0.045597,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.454545,0.01548,0.016077,1.0,1.0,1.0
200,0.0507,0.006145,0.411765,0.317308,0.320132,0.0,0.0,0.0,0.172414,0.151515,0.152225,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.4,0.315789,0.318367,1.0,1.0,1.0
250,0.0418,0.005121,0.402284,0.43544,0.434064,0.0,0.0,0.0,0.421053,0.484848,0.482039,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.385359,0.431889,0.429892,1.0,1.0,1.0
300,0.0291,0.004006,0.534161,0.472527,0.474634,0.0,0.0,0.0,0.434783,0.606061,0.597015,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.528897,0.467492,0.469589,1.0,1.0,1.0
350,0.0285,0.003382,0.677188,0.648352,0.649415,0.0,0.0,0.0,0.571429,0.848485,0.832952,0.0,0.0,0.0,1.0,1.0,1.0,0.666667,0.074074,0.076696,0.676329,0.650155,0.651124,1.0,1.0,1.0
400,0.0213,0.003192,0.634421,0.774725,0.768191,0.0,0.0,0.0,0.54,0.818182,0.802286,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.518519,0.528302,0.629397,0.775542,0.768677,1.0,1.0,1.0
450,0.0174,0.003211,0.588473,0.799451,0.788577,0.0,0.0,0.0,0.568966,1.0,0.971687,0.0,0.0,0.0,1.0,1.0,1.0,0.785714,0.407407,0.415094,0.584371,0.798762,0.787648,1.0,1.0,1.0
500,0.0173,0.002561,0.70122,0.789835,0.786015,0.0,0.0,0.0,0.595745,0.848485,0.834862,0.0,0.0,0.0,1.0,1.0,1.0,0.75,0.666667,0.669528,0.706128,0.78483,0.78148,1.0,1.0,1.0


The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1714
  Batch size = 8
Saving model checkpoint to output/fold_1/checkpoint-50
Configuration saved in output/fold_1/checkpoint-50/config.json
Model weights saved in output/fold_1/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output/fold_1/checkpoint-50/tokenizer_config.json
Special tokens file saved in output/fold_1/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClass

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
eval/f5,▁▁▁▄▅▅▆▇████████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▁▁▁▁▁▂▅▄▆██████
eval/f5-NAME_STUDENT,▁▁▁▄▅▅▇█████████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-URL_PERSONAL,▁▁▁▂▄▅▇▇█▇▇██▇▇█
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.83344
eval/f5-EMAIL,1.0
eval/f5-ID_NUM,0.88385
eval/f5-NAME_STUDENT,0.81898
eval/f5-PHONE_NUM,1.0
eval/f5-STREET_ADDRESS,0.0
eval/f5-URL_PERSONAL,0.97059
eval/f5-USERNAME,0.0
eval/loss,0.00249
eval/p-EMAIL,1.0


         

#0:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/505 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/505 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/212 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/211 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/211 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4040
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 2
  Total optimization steps = 756
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666944471677804, max=1.0)…

Step,Training Loss,Validation Loss,Precision,Recall,F5,P-username,R-username,F5-username,P-url Personal,R-url Personal,F5-url Personal,P-street Address,R-street Address,F5-street Address,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-name Student,R-name Student,F5-name Student,P-phone Num,R-phone Num,F5-phone Num
50,0.6391,0.279448,0.9,0.012195,0.012676,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.888889,1.0,0.995215,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
100,0.1429,0.029031,0.9,0.012195,0.012676,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.888889,1.0,0.995215,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
150,0.0619,0.009826,0.165919,0.050136,0.051518,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.888889,1.0,0.995215,0.0,0.0,0.0,0.131455,0.04236,0.043494,1.0,1.0,1.0
200,0.0581,0.008249,0.325811,0.313008,0.313482,0.0,0.0,0.0,0.029851,0.071429,0.067797,0.666667,0.727273,0.724739,0.888889,1.0,0.995215,0.0,0.0,0.0,0.335526,0.308623,0.309578,1.0,1.0,1.0
250,0.0463,0.005461,0.38724,0.353659,0.354842,0.0,0.0,0.0,0.121212,0.142857,0.141883,1.0,0.363636,0.37276,0.888889,1.0,0.995215,0.0,0.0,0.0,0.385233,0.363086,0.363891,1.0,1.0,1.0
300,0.0321,0.004145,0.424757,0.474255,0.472139,0.0,0.0,0.0,0.393939,0.928571,0.882507,0.5,0.818182,0.798635,0.888889,1.0,0.995215,0.0,0.0,0.0,0.417135,0.449319,0.44799,1.0,1.0,1.0
350,0.029,0.003603,0.559201,0.531165,0.532192,0.0,0.0,0.0,0.471698,0.892857,0.863214,0.818182,0.818182,0.818182,0.888889,1.0,0.995215,0.0,0.0,0.0,0.551948,0.514372,0.515723,1.0,1.0,1.0
400,0.0222,0.003768,0.542117,0.680217,0.673617,0.0,0.0,0.0,0.428571,0.964286,0.920052,0.782609,0.818182,0.816754,0.888889,1.0,0.995215,0.0,0.0,0.0,0.539759,0.677761,0.671161,1.0,1.0,1.0
450,0.0257,0.003231,0.643457,0.726287,0.722709,0.0,0.0,0.0,0.454545,0.892857,0.860927,0.857143,0.818182,0.819615,0.888889,1.0,0.995215,1.0,0.142857,0.147727,0.64698,0.729198,0.725651,1.0,1.0,1.0
500,0.0192,0.003314,0.652794,0.743902,0.739931,0.0,0.0,0.0,0.40625,0.928571,0.884817,0.666667,0.818182,0.811092,0.888889,1.0,0.995215,0.769231,0.714286,0.716253,0.668501,0.73525,0.732437,1.0,1.0,1.0


The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1689
  Batch size = 8
Saving model checkpoint to output/fold_2/checkpoint-50
Configuration saved in output/fold_2/checkpoint-50/config.json
Model weights saved in output/fold_2/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output/fold_2/checkpoint-50/tokenizer_config.json
Special tokens file saved in output/fold_2/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClass

0,1
eval/f5,▁▁▁▄▄▅▆▇████████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▁▁▁▁▁▁▁▂▇▆▇▇▇██
eval/f5-NAME_STUDENT,▁▁▁▄▄▅▆▇████████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▇▄███████████
eval/f5-URL_PERSONAL,▁▁▁▂▂████████▇▇▇
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.76907
eval/f5-EMAIL,0.99522
eval/f5-ID_NUM,0.79224
eval/f5-NAME_STUDENT,0.76506
eval/f5-PHONE_NUM,1.0
eval/f5-STREET_ADDRESS,0.82105
eval/f5-URL_PERSONAL,0.84324
eval/f5-USERNAME,0.0
eval/loss,0.0026
eval/p-EMAIL,0.88889


         

#0:   0%|          | 0/504 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/503 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/503 [00:00<?, ?ex/s]

         

#0:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#1:   0%|          | 0/214 [00:00<?, ?ex/s]

 

#2:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#3:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#4:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#5:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#6:   0%|          | 0/213 [00:00<?, ?ex/s]

 

#7:   0%|          | 0/213 [00:00<?, ?ex/s]

Using cuda_amp half precision backend
The following columns in the training set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 4025
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 2
  Total optimization steps = 756
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670319850042383, max=1.0…

Step,Training Loss,Validation Loss,Precision,Recall,F5,P-username,R-username,F5-username,P-url Personal,R-url Personal,F5-url Personal,P-street Address,R-street Address,F5-street Address,P-email,R-email,F5-email,P-id Num,R-id Num,F5-id Num,P-name Student,R-name Student,F5-name Student,P-phone Num,R-phone Num,F5-phone Num
50,0.6483,0.279197,1.0,0.034146,0.035464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
100,0.1236,0.019538,1.0,0.034146,0.035464,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
150,0.0708,0.007315,0.686567,0.074797,0.077451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.543478,0.044964,0.046608,1.0,1.0,1.0
200,0.0447,0.005374,0.423671,0.401626,0.402431,0.0,0.0,0.0,0.125,0.12,0.120185,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.414498,0.401079,0.401579,1.0,1.0,1.0
250,0.0332,0.004335,0.400821,0.476423,0.472991,0.0,0.0,0.0,0.206522,0.76,0.688982,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.421667,0.455036,0.453655,1.0,1.0,1.0
300,0.0269,0.003565,0.453804,0.543089,0.539011,0.0,0.0,0.0,0.380952,0.64,0.623688,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.443284,0.534173,0.529993,1.0,1.0,1.0
350,0.0249,0.003037,0.617564,0.708943,0.704931,0.0,0.0,0.0,0.421053,0.64,0.627451,0.0,0.0,0.0,1.0,1.0,1.0,0.777778,0.583333,0.588997,0.617323,0.705036,0.701204,1.0,1.0,1.0
400,0.0267,0.002686,0.627877,0.798374,0.790122,0.0,0.0,0.0,0.592593,0.64,0.638037,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.75,0.757282,0.616343,0.80036,0.791273,1.0,1.0,1.0
450,0.0157,0.002835,0.624694,0.830894,0.820478,0.0,0.0,0.0,0.283951,0.92,0.847025,0.0,0.0,0.0,1.0,1.0,1.0,0.733333,0.916667,0.907937,0.653295,0.820144,0.812166,1.0,1.0,1.0
500,0.0158,0.002632,0.660274,0.78374,0.778143,0.0,0.0,0.0,0.619048,0.52,0.52322,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.666667,0.675325,0.649926,0.791367,0.784798,1.0,1.0,1.0


The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClassification.forward` and have been ignored: length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text. If length, trailing_whitespace, offset_mapping, provided_labels, tokens, token_map, document, full_text are not expected by `BiLSTMLongFormerForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1706
  Batch size = 8
Saving model checkpoint to output/fold_3/checkpoint-50
Configuration saved in output/fold_3/checkpoint-50/config.json
Model weights saved in output/fold_3/checkpoint-50/pytorch_model.bin
tokenizer config file saved in output/fold_3/checkpoint-50/tokenizer_config.json
Special tokens file saved in output/fold_3/checkpoint-50/special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `BiLSTMLongFormerForTokenClass

0,1
eval/f5,▁▁▁▄▅▅▇▇█▇██████
eval/f5-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-ID_NUM,▁▁▁▁▁▁▆▇█▆▇█████
eval/f5-NAME_STUDENT,▁▁▁▄▅▅▇██▇██████
eval/f5-PHONE_NUM,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-STREET_ADDRESS,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/f5-URL_PERSONAL,▁▁▁▂▆▆▆▆█▅▇██▇██
eval/f5-USERNAME,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/p-EMAIL,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval/f5,0.856
eval/f5-EMAIL,1.0
eval/f5-ID_NUM,0.91083
eval/f5-NAME_STUDENT,0.84985
eval/f5-PHONE_NUM,1.0
eval/f5-STREET_ADDRESS,0.0
eval/f5-URL_PERSONAL,0.89121
eval/f5-USERNAME,0.0
eval/loss,0.00221
eval/p-EMAIL,1.0


## Log CV

In [17]:
wandb.init(name="cv")
results = dict()
for res_json_path in Path(OUTPUT_DIR).glob("fold*/eval_result.json"):
    fold = res_json_path.parent.name.split("_")[-1]
    with open(res_json_path, "r") as f:
        res = json.load(f)
        results[fold] = {k.replace("eval_", ""): v for k, v in res.items()}
results["cv"] = {key: np.mean([r[key] for r in results.values()]) for key in results["0"].keys()}
table = wandb.Table(columns=["fold"] + list(results["0"].keys()))
for f, res in results.items():
    table.add_data(f, *[res[c] for c in table.columns if c != "fold"])
wandb.log({"eval_result": table})
wandb.finish()

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670582450024084, max=1.0…