In [1]:
import uuid
from functools import partial

import evaluate
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoModelForTokenClassification,
    AutoTokenizer,
    DataCollatorForTokenClassification,
    Trainer,
    TrainingArguments,
)


def is_subword(text, tokenized, tokenizer, index):
    word = tokenizer.convert_ids_to_tokens(tokenized["input_ids"][index])
    start_ind, end_ind = tokenized["offset_mapping"][index]
    word_ref = text[start_ind:end_ind]
    is_subword = len(word) != len(word_ref)
    return is_subword


def tokenize(example, labels2int, tokenizer, iob=True, ignore_subwords=True):

    text, labels = example["source_text"], example["privacy_mask"]

    i = 0
    token_labels = []

    tokenized = tokenizer(text, return_offsets_mapping=True, return_special_tokens_mask=True)
    start_token_to_label = {
        tokenized.char_to_token(label["start"]): (label["start"], label["end"], label["label"]) for label in labels
    }
    while i < len(tokenized["input_ids"]):
        if tokenized["special_tokens_mask"][i] == 1:
            token_labels.append(-100)
            i += 1
        elif i not in start_token_to_label:
            if ignore_subwords and is_subword(text, tokenized, tokenizer, i):
                token_labels.append(-100)
            else:
                token_labels.append(labels2int["O"])
            i += 1
        else:
            start, end, label = start_token_to_label[i]
            start_token = tokenized.char_to_token(start)
            assert start_token == i
            j = start_token
            while j < (len(tokenized["input_ids"]) - 1) and tokenized.token_to_chars(j).start < end:
                if j == start_token:
                    if iob:
                        token_labels.append(labels2int["B-" + label])
                    else:
                        token_labels.append(labels2int[label])
                elif ignore_subwords and is_subword(text, tokenized, tokenizer, j):
                    token_labels.append(-100)
                else:
                    if iob:
                        token_labels.append(labels2int["I-" + label])
                    else:
                        token_labels.append(labels2int[label])

                j += 1
            i = j
    tokenized["labels"] = token_labels
    return tokenized


def compute_metrics(eval_pred, label_list, seqeval_metric):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval_metric.compute(predictions=true_predictions, references=true_labels)
    results_flat = {f"{k}_f1": v["f1"] for k, v in results.items() if isinstance(v, dict)}
    results_flat.update(
        {
            "precision": results["overall_precision"],
            "recall": results["overall_recall"],
            "f1": results["overall_f1"],
            "accuracy": results["overall_accuracy"],
        }
    )
    return results_flat


  from .autonotebook import tqdm as notebook_tqdm
2024-09-21 18:35:37.037172: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-21 18:35:37.052232: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-21 18:35:37.056890: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-21 18:35:37.068923: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
labels = [
    "BOD",
    "BUILDING",
    "CARDISSUER",
    "CITY",
    "COUNTRY",
    "DATE",
    "DRIVERLICENSE",
    "EMAIL",
    "GEOCOORD",
    "GIVENNAME1",
    "GIVENNAME2",
    "IDCARD",
    "IP",
    "LASTNAME1",
    "LASTNAME2",
    "LASTNAME3",
    "PASS",
    "PASSPORT",
    "POSTCODE",
    "SECADDRESS",
    "SEX",
    "SOCIALNUMBER",
    "STATE",
    "STREET",
    "TEL",
    "TIME",
    "TITLE",
    "USERNAME",
]

labels = [f"I-{label}" for label in labels] + [f"B-{label}" for label in labels] + ["O"]
label2id = {label: i for i, label in enumerate(labels)}
id2label = {v: k for k, v in label2id.items()}
pretrained_name = "google/mobilebert-uncased"
tokenizer = AutoTokenizer.from_pretrained(pretrained_name)
model = AutoModelForTokenClassification.from_pretrained(pretrained_name, num_labels=len(labels), id2label=id2label)

ds = load_dataset("ai4privacy/pii-masking-300k")
ds = ds.filter(lambda x: x["language"] == "English", num_proc=4)
ds = ds.map(
    partial(tokenize, labels2int=label2id, tokenizer=tokenizer, iob=True, ignore_subwords=True),
    batched=False,
    remove_columns=[
        "source_text",
        "target_text",
        "privacy_mask",
        "span_labels",
        "mbert_text_tokens",
        "mbert_bio_labels",
        "id",
        "language",
        "set",
    ],
    num_proc=8,
).remove_columns(["offset_mapping"])

training_arguments = TrainingArguments(
    output_dir="output_MOBILE_BERT",
    max_steps=40000,
    eval_steps=5000,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    overwrite_output_dir=True,
    warmup_ratio=0.2,
    learning_rate=2e-5,
    weight_decay=0.01,
    save_strategy="steps",
    eval_strategy="steps",
    load_best_model_at_end=True,
    save_total_limit=1,
    save_steps=10000,
    lr_scheduler_type="cosine",
    warmup_steps=3000,
    metric_for_best_model="f1",
    greater_is_better=True,
    torch_compile=False,

)
trainer = Trainer(
    model,
    training_arguments,
    train_dataset=ds["train"],
    eval_dataset=ds["validation"],
    data_collator=DataCollatorForTokenClassification(tokenizer),
    tokenizer=tokenizer,
    compute_metrics=partial(compute_metrics, label_list=labels, seqeval_metric=evaluate.load("seqeval")),
)


Some weights of MobileBertForTokenClassification were not initialized from the model checkpoint at google/mobilebert-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
max_steps is given, it will override any value given in num_train_epochs


In [5]:
trainer.train()


  1%|▏         | 500/40000 [02:01<2:29:06,  4.42it/s]

{'loss': 3.9588, 'grad_norm': 1.8109838962554932, 'learning_rate': 3.3333333333333333e-06, 'epoch': 0.53}


  3%|▎         | 1001/40000 [04:02<2:31:08,  4.30it/s]

{'loss': 0.2998, 'grad_norm': 1.2737737894058228, 'learning_rate': 6.666666666666667e-06, 'epoch': 1.07}


  4%|▍         | 1500/40000 [06:03<2:43:32,  3.92it/s]

{'loss': 0.1793, 'grad_norm': 1.5632212162017822, 'learning_rate': 1e-05, 'epoch': 1.6}


  5%|▌         | 2001/40000 [08:05<2:17:59,  4.59it/s]

{'loss': 0.0976, 'grad_norm': 1.993319034576416, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.14}


  6%|▋         | 2500/40000 [10:05<2:35:36,  4.02it/s]

{'loss': 0.0832, 'grad_norm': 0.6880690455436707, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.67}


  8%|▊         | 3000/40000 [12:06<2:39:14,  3.87it/s]

{'loss': 0.0665, 'grad_norm': 0.49479493498802185, 'learning_rate': 2e-05, 'epoch': 3.21}


  9%|▉         | 3500/40000 [14:06<2:36:07,  3.90it/s]

{'loss': 0.0581, 'grad_norm': 0.40607622265815735, 'learning_rate': 1.999098966204682e-05, 'epoch': 3.74}


 10%|█         | 4000/40000 [16:08<2:39:10,  3.77it/s]

{'loss': 0.049, 'grad_norm': 0.24085965752601624, 'learning_rate': 1.9963974885425267e-05, 'epoch': 4.28}


 11%|█▏        | 4500/40000 [18:10<2:21:07,  4.19it/s]

{'loss': 0.0454, 'grad_norm': 0.42563486099243164, 'learning_rate': 1.9919004352588768e-05, 'epoch': 4.81}


 12%|█▎        | 5000/40000 [20:10<2:17:13,  4.25it/s]

{'loss': 0.0456, 'grad_norm': 0.6230946779251099, 'learning_rate': 1.9856159103477085e-05, 'epoch': 5.35}


  _warn_prf(average, modifier, msg_start, len(result))
                                                      
 12%|█▎        | 5000/40000 [20:40<2:17:13,  4.25it/s]

{'eval_loss': 0.04094799607992172, 'eval_BOD_f1': 0.952564928096158, 'eval_BUILDING_f1': 0.9741200828157349, 'eval_CARDISSUER_f1': 0.0, 'eval_CITY_f1': 0.959058592792351, 'eval_COUNTRY_f1': 0.9598981540420114, 'eval_DATE_f1': 0.8911830357142858, 'eval_DRIVERLICENSE_f1': 0.9259259259259259, 'eval_EMAIL_f1': 0.9783434650455927, 'eval_GEOCOORD_f1': 0.9411764705882353, 'eval_GIVENNAME1_f1': 0.8202409638554217, 'eval_GIVENNAME2_f1': 0.6549222797927462, 'eval_IDCARD_f1': 0.9152105940776163, 'eval_IP_f1': 0.9803831063927995, 'eval_LASTNAME1_f1': 0.768262987012987, 'eval_LASTNAME2_f1': 0.5534324133050248, 'eval_LASTNAME3_f1': 0.4011299435028249, 'eval_PASS_f1': 0.8996350364963505, 'eval_PASSPORT_f1': 0.9352697095435686, 'eval_POSTCODE_f1': 0.9630015633142261, 'eval_SECADDRESS_f1': 0.956876456876457, 'eval_SEX_f1': 0.9636228656273199, 'eval_SOCIALNUMBER_f1': 0.9358341559723594, 'eval_STATE_f1': 0.9742063492063493, 'eval_STREET_f1': 0.9572999232932754, 'eval_TEL_f1': 0.9520039584364176, 'eval_TI

 14%|█▍        | 5500/40000 [22:41<2:18:52,  4.14it/s] 

{'loss': 0.044, 'grad_norm': 0.6612284779548645, 'learning_rate': 1.9775552389476865e-05, 'epoch': 5.88}


 15%|█▌        | 6000/40000 [24:42<1:58:52,  4.77it/s]

{'loss': 0.0445, 'grad_norm': 0.6533606052398682, 'learning_rate': 1.967732946933499e-05, 'epoch': 6.42}


 16%|█▋        | 6500/40000 [26:44<2:10:32,  4.28it/s]

{'loss': 0.0336, 'grad_norm': 0.6657323241233826, 'learning_rate': 1.956166734739251e-05, 'epoch': 6.95}


 18%|█▊        | 7000/40000 [28:44<2:01:48,  4.52it/s]

{'loss': 0.039, 'grad_norm': 0.4008713662624359, 'learning_rate': 1.9428774454610845e-05, 'epoch': 7.49}


 19%|█▉        | 7500/40000 [30:46<2:04:15,  4.36it/s]

{'loss': 0.028, 'grad_norm': 0.6195588111877441, 'learning_rate': 1.9278890272965097e-05, 'epoch': 8.02}


 20%|██        | 8000/40000 [32:47<1:59:36,  4.46it/s]

{'loss': 0.0261, 'grad_norm': 0.5949769616127014, 'learning_rate': 1.911228490388136e-05, 'epoch': 8.56}


 21%|██▏       | 8500/40000 [34:48<2:21:07,  3.72it/s]

{'loss': 0.0234, 'grad_norm': 0.37158504128456116, 'learning_rate': 1.8929258581495688e-05, 'epoch': 9.09}


 22%|██▎       | 9000/40000 [36:50<2:02:45,  4.21it/s]

{'loss': 0.0227, 'grad_norm': 0.4138241708278656, 'learning_rate': 1.8730141131611882e-05, 'epoch': 9.63}


 24%|██▍       | 9500/40000 [38:52<2:06:49,  4.01it/s]

{'loss': 0.0208, 'grad_norm': 0.5190874338150024, 'learning_rate': 1.8515291377333114e-05, 'epoch': 10.16}


 25%|██▌       | 10000/40000 [40:53<1:52:46,  4.43it/s]

{'loss': 0.0229, 'grad_norm': 0.8583167791366577, 'learning_rate': 1.8285096492438424e-05, 'epoch': 10.7}


                                                       
 25%|██▌       | 10000/40000 [41:22<1:52:46,  4.43it/s]

{'eval_loss': 0.04017198458313942, 'eval_BOD_f1': 0.9592798971281612, 'eval_BUILDING_f1': 0.9742400824317362, 'eval_CARDISSUER_f1': 0.0, 'eval_CITY_f1': 0.9631087063453025, 'eval_COUNTRY_f1': 0.968274111675127, 'eval_DATE_f1': 0.9122507122507123, 'eval_DRIVERLICENSE_f1': 0.9382352941176471, 'eval_EMAIL_f1': 0.9800645528764002, 'eval_GEOCOORD_f1': 0.9653579676674365, 'eval_GIVENNAME1_f1': 0.8443184502716751, 'eval_GIVENNAME2_f1': 0.7523892267593398, 'eval_IDCARD_f1': 0.9233885819521178, 'eval_IP_f1': 0.9806540764624597, 'eval_LASTNAME1_f1': 0.817825459046833, 'eval_LASTNAME2_f1': 0.6666666666666666, 'eval_LASTNAME3_f1': 0.5706051873198847, 'eval_PASS_f1': 0.9109274563820019, 'eval_PASSPORT_f1': 0.9415878239407651, 'eval_POSTCODE_f1': 0.9708636836628513, 'eval_SECADDRESS_f1': 0.9634782608695652, 'eval_SEX_f1': 0.9684315187670892, 'eval_SOCIALNUMBER_f1': 0.9415929203539823, 'eval_STATE_f1': 0.9711893622260527, 'eval_STREET_f1': 0.9666580511760146, 'eval_TEL_f1': 0.9606377678126559, 'eval_

 26%|██▋       | 10501/40000 [43:23<1:45:17,  4.67it/s] 

{'loss': 0.0177, 'grad_norm': 0.20382823050022125, 'learning_rate': 1.8039971303669407e-05, 'epoch': 11.23}


 28%|██▊       | 11000/40000 [45:24<1:45:52,  4.57it/s]

{'loss': 0.0259, 'grad_norm': 0.30682307481765747, 'learning_rate': 1.7780357543184396e-05, 'epoch': 11.76}


 29%|██▉       | 11500/40000 [47:25<1:52:56,  4.21it/s]

{'loss': 0.0141, 'grad_norm': 0.15557126700878143, 'learning_rate': 1.7506723052527243e-05, 'epoch': 12.3}


 30%|███       | 12001/40000 [49:27<1:33:56,  4.97it/s]

{'loss': 0.0199, 'grad_norm': 0.21364352107048035, 'learning_rate': 1.7219560939545246e-05, 'epoch': 12.83}


 31%|███▏      | 12500/40000 [51:28<1:47:33,  4.26it/s]

{'loss': 0.0144, 'grad_norm': 0.5480033755302429, 'learning_rate': 1.6919388689775463e-05, 'epoch': 13.37}


 32%|███▎      | 13000/40000 [53:28<1:43:14,  4.36it/s]

{'loss': 0.014, 'grad_norm': 0.4372042417526245, 'learning_rate': 1.6606747233900816e-05, 'epoch': 13.9}


 34%|███▍      | 13500/40000 [55:29<1:42:16,  4.32it/s]

{'loss': 0.0122, 'grad_norm': 0.5194843411445618, 'learning_rate': 1.6282199972956425e-05, 'epoch': 14.44}


 35%|███▌      | 14001/40000 [57:30<1:45:05,  4.12it/s]

{'loss': 0.0143, 'grad_norm': 1.0879353284835815, 'learning_rate': 1.594633176304287e-05, 'epoch': 14.97}


 36%|███▋      | 14500/40000 [59:31<1:41:20,  4.19it/s]

{'loss': 0.0106, 'grad_norm': 0.4840964674949646, 'learning_rate': 1.5599747861375957e-05, 'epoch': 15.51}


 38%|███▊      | 15000/40000 [1:01:32<1:43:40,  4.02it/s]

{'loss': 0.0106, 'grad_norm': 0.28183260560035706, 'learning_rate': 1.5243072835572319e-05, 'epoch': 16.04}


                                                         
 38%|███▊      | 15001/40000 [1:02:01<61:31:03,  8.86s/it]

{'eval_loss': 0.04555591195821762, 'eval_BOD_f1': 0.9673654635833153, 'eval_BUILDING_f1': 0.979392065945389, 'eval_CARDISSUER_f1': 1.0, 'eval_CITY_f1': 0.968503937007874, 'eval_COUNTRY_f1': 0.9690851735015772, 'eval_DATE_f1': 0.9165714285714285, 'eval_DRIVERLICENSE_f1': 0.9476330121491412, 'eval_EMAIL_f1': 0.978731484998101, 'eval_GEOCOORD_f1': 0.9565217391304347, 'eval_GIVENNAME1_f1': 0.855544747081712, 'eval_GIVENNAME2_f1': 0.7738883632923367, 'eval_IDCARD_f1': 0.932206572769953, 'eval_IP_f1': 0.9818181818181818, 'eval_LASTNAME1_f1': 0.8264029067420267, 'eval_LASTNAME2_f1': 0.7100401606425703, 'eval_LASTNAME3_f1': 0.6486486486486487, 'eval_PASS_f1': 0.9211491442542786, 'eval_PASSPORT_f1': 0.9512543340811748, 'eval_POSTCODE_f1': 0.9731980223783502, 'eval_SECADDRESS_f1': 0.968041836141778, 'eval_SEX_f1': 0.9704494661037992, 'eval_SOCIALNUMBER_f1': 0.9485511531638084, 'eval_STATE_f1': 0.977205153617443, 'eval_STREET_f1': 0.9665716506867066, 'eval_TEL_f1': 0.9696969696969697, 'eval_TIME_

 39%|███▉      | 15501/40000 [1:04:02<1:36:51,  4.22it/s] 

{'loss': 0.0123, 'grad_norm': 0.894921600818634, 'learning_rate': 1.4876949438136348e-05, 'epoch': 16.58}


 40%|████      | 16001/40000 [1:06:03<1:27:38,  4.56it/s]

{'loss': 0.0102, 'grad_norm': 0.2521211802959442, 'learning_rate': 1.4502037448176734e-05, 'epoch': 17.11}


 41%|████▏     | 16500/40000 [1:08:04<1:42:30,  3.82it/s]

{'loss': 0.0147, 'grad_norm': 0.42659077048301697, 'learning_rate': 1.4119012482439929e-05, 'epoch': 17.65}


 43%|████▎     | 17001/40000 [1:10:04<1:24:36,  4.53it/s]

{'loss': 0.008, 'grad_norm': 0.2697450518608093, 'learning_rate': 1.3728564777803089e-05, 'epoch': 18.18}


 44%|████▍     | 17501/40000 [1:12:06<1:23:00,  4.52it/s]

{'loss': 0.0073, 'grad_norm': 0.6405779123306274, 'learning_rate': 1.3331397947420578e-05, 'epoch': 18.72}


 45%|████▌     | 18000/40000 [1:14:07<1:33:18,  3.93it/s]

{'loss': 0.0079, 'grad_norm': 0.23745204508304596, 'learning_rate': 1.2928227712765504e-05, 'epoch': 19.25}


 46%|████▋     | 18500/40000 [1:16:08<1:12:05,  4.97it/s]

{'loss': 0.008, 'grad_norm': 0.22641661763191223, 'learning_rate': 1.2519780613851254e-05, 'epoch': 19.79}


 48%|████▊     | 19000/40000 [1:18:09<1:30:22,  3.87it/s]

{'loss': 0.0071, 'grad_norm': 0.13783341646194458, 'learning_rate': 1.2106792699957264e-05, 'epoch': 20.32}


 49%|████▉     | 19500/40000 [1:20:09<1:20:53,  4.22it/s]

{'loss': 0.006, 'grad_norm': 0.36772456765174866, 'learning_rate': 1.1690008203218493e-05, 'epoch': 20.86}


 50%|█████     | 20000/40000 [1:22:12<1:24:09,  3.96it/s]

{'loss': 0.0141, 'grad_norm': 0.33664700388908386, 'learning_rate': 1.1270178197468788e-05, 'epoch': 21.39}


                                                         
 50%|█████     | 20000/40000 [1:22:40<1:24:09,  3.96it/s]

{'eval_loss': 0.053060367703437805, 'eval_BOD_f1': 0.9644626319190178, 'eval_BUILDING_f1': 0.9796967360575688, 'eval_CARDISSUER_f1': 1.0, 'eval_CITY_f1': 0.9694881889763779, 'eval_COUNTRY_f1': 0.972169512966477, 'eval_DATE_f1': 0.9121071012805588, 'eval_DRIVERLICENSE_f1': 0.951011048572024, 'eval_EMAIL_f1': 0.9826963301007796, 'eval_GEOCOORD_f1': 0.9517241379310345, 'eval_GIVENNAME1_f1': 0.8622522957950701, 'eval_GIVENNAME2_f1': 0.7789275634995296, 'eval_IDCARD_f1': 0.9376408715251691, 'eval_IP_f1': 0.9843245735361917, 'eval_LASTNAME1_f1': 0.8240570846075435, 'eval_LASTNAME2_f1': 0.7145061728395061, 'eval_LASTNAME3_f1': 0.6797752808988764, 'eval_PASS_f1': 0.9242563630788102, 'eval_PASSPORT_f1': 0.9520701611258414, 'eval_POSTCODE_f1': 0.9764890282131662, 'eval_SECADDRESS_f1': 0.9708624708624708, 'eval_SEX_f1': 0.9698318496538081, 'eval_SOCIALNUMBER_f1': 0.9526987242394505, 'eval_STATE_f1': 0.9784706755753526, 'eval_STREET_f1': 0.9671749806151461, 'eval_TEL_f1': 0.9680638722554891, 'eval

 51%|█████▏    | 20500/40000 [1:24:42<1:10:12,  4.63it/s] 

{'loss': 0.0106, 'grad_norm': 0.2723449170589447, 'learning_rate': 1.0848059244755093e-05, 'epoch': 21.93}


 52%|█████▎    | 21000/40000 [1:26:43<1:18:07,  4.05it/s]

{'loss': 0.0049, 'grad_norm': 0.24789108335971832, 'learning_rate': 1.0424412031961485e-05, 'epoch': 22.46}


 54%|█████▍    | 21500/40000 [1:28:44<1:12:37,  4.25it/s]

{'loss': 0.005, 'grad_norm': 0.20519410073757172, 'learning_rate': 1e-05, 'epoch': 22.99}


 55%|█████▌    | 22000/40000 [1:30:44<1:05:47,  4.56it/s]

{'loss': 0.0044, 'grad_norm': 0.4043301045894623, 'learning_rate': 9.57558796803852e-06, 'epoch': 23.53}


 56%|█████▋    | 22500/40000 [1:32:46<1:13:27,  3.97it/s]

{'loss': 0.0046, 'grad_norm': 0.27520281076431274, 'learning_rate': 9.151940755244912e-06, 'epoch': 24.06}


 57%|█████▊    | 23000/40000 [1:34:45<1:16:38,  3.70it/s]

{'loss': 0.0041, 'grad_norm': 0.10447246581315994, 'learning_rate': 8.729821802531213e-06, 'epoch': 24.6}


 59%|█████▉    | 23500/40000 [1:36:46<1:03:40,  4.32it/s]

{'loss': 0.004, 'grad_norm': 0.14568376541137695, 'learning_rate': 8.309991796781512e-06, 'epoch': 25.13}


 60%|██████    | 24000/40000 [1:38:46<1:05:44,  4.06it/s]

{'loss': 0.0035, 'grad_norm': 0.32044270634651184, 'learning_rate': 7.89320730004274e-06, 'epoch': 25.67}


 61%|██████▏   | 24500/40000 [1:40:46<57:17,  4.51it/s]  

{'loss': 0.0035, 'grad_norm': 0.23082685470581055, 'learning_rate': 7.480219386148751e-06, 'epoch': 26.2}


 62%|██████▎   | 25000/40000 [1:42:48<1:00:32,  4.13it/s]

{'loss': 0.0047, 'grad_norm': 0.15712782740592957, 'learning_rate': 7.071772287234497e-06, 'epoch': 26.74}


                                                         
 62%|██████▎   | 25000/40000 [1:43:16<1:00:32,  4.13it/s]

{'eval_loss': 0.05697057023644447, 'eval_BOD_f1': 0.9640318759422787, 'eval_BUILDING_f1': 0.9799897383273473, 'eval_CARDISSUER_f1': 1.0, 'eval_CITY_f1': 0.967473709953534, 'eval_COUNTRY_f1': 0.9654522613065327, 'eval_DATE_f1': 0.9224941724941724, 'eval_DRIVERLICENSE_f1': 0.9536039768019885, 'eval_EMAIL_f1': 0.9847850893875998, 'eval_GEOCOORD_f1': 0.9585253456221199, 'eval_GIVENNAME1_f1': 0.8598448108632396, 'eval_GIVENNAME2_f1': 0.7817351598173515, 'eval_IDCARD_f1': 0.9392789373814042, 'eval_IP_f1': 0.9845444059976931, 'eval_LASTNAME1_f1': 0.8281121187139323, 'eval_LASTNAME2_f1': 0.7244340359094457, 'eval_LASTNAME3_f1': 0.673469387755102, 'eval_PASS_f1': 0.921760391198044, 'eval_PASSPORT_f1': 0.9539217694040548, 'eval_POSTCODE_f1': 0.975991649269311, 'eval_SECADDRESS_f1': 0.9698725376593279, 'eval_SEX_f1': 0.9722497522299305, 'eval_SOCIALNUMBER_f1': 0.9537617554858935, 'eval_STATE_f1': 0.9794197867592364, 'eval_STREET_f1': 0.9678756476683938, 'eval_TEL_f1': 0.9662698412698413, 'eval_TI

 64%|██████▍   | 25501/40000 [1:45:16<53:24,  4.52it/s]   

{'loss': 0.0033, 'grad_norm': 0.11458593606948853, 'learning_rate': 6.668602052579425e-06, 'epoch': 27.27}


 65%|██████▌   | 26000/40000 [1:47:15<52:36,  4.43it/s]  

{'loss': 0.0032, 'grad_norm': 0.11279218643903732, 'learning_rate': 6.2714352221969155e-06, 'epoch': 27.81}


 66%|██████▋   | 26500/40000 [1:49:17<50:06,  4.49it/s]  

{'loss': 0.0028, 'grad_norm': 0.054321713745594025, 'learning_rate': 5.880987517560075e-06, 'epoch': 28.34}


 68%|██████▊   | 27000/40000 [1:51:18<49:49,  4.35it/s]  

{'loss': 0.0029, 'grad_norm': 0.25729215145111084, 'learning_rate': 5.497962551823266e-06, 'epoch': 28.88}


 69%|██████▉   | 27500/40000 [1:53:18<45:55,  4.54it/s]  

{'loss': 0.0028, 'grad_norm': 0.3174384534358978, 'learning_rate': 5.1230505618636575e-06, 'epoch': 29.41}


 70%|███████   | 28000/40000 [1:55:19<48:30,  4.12it/s]  

{'loss': 0.0027, 'grad_norm': 0.15291376411914825, 'learning_rate': 4.756927164427685e-06, 'epoch': 29.95}


 71%|███████▏  | 28501/40000 [1:57:20<39:55,  4.80it/s]  

{'loss': 0.0023, 'grad_norm': 0.32036882638931274, 'learning_rate': 4.400252138624047e-06, 'epoch': 30.48}


 72%|███████▎  | 29000/40000 [1:59:21<39:56,  4.59it/s]  

{'loss': 0.0054, 'grad_norm': 0.06062778830528259, 'learning_rate': 4.053668236957135e-06, 'epoch': 31.02}


 74%|███████▍  | 29500/40000 [2:01:22<39:58,  4.38it/s]

{'loss': 0.0023, 'grad_norm': 0.15098704397678375, 'learning_rate': 3.7178000270435765e-06, 'epoch': 31.55}


 75%|███████▌  | 30000/40000 [2:03:21<44:11,  3.77it/s]

{'loss': 0.0021, 'grad_norm': 0.18141184747219086, 'learning_rate': 3.3932527660991877e-06, 'epoch': 32.09}


                                                       
 75%|███████▌  | 30000/40000 [2:03:50<44:11,  3.77it/s]

{'eval_loss': 0.06423956900835037, 'eval_BOD_f1': 0.9631544925662573, 'eval_BUILDING_f1': 0.9817339850784667, 'eval_CARDISSUER_f1': 0.0, 'eval_CITY_f1': 0.9672211350293543, 'eval_COUNTRY_f1': 0.9688581314878892, 'eval_DATE_f1': 0.9149728027483539, 'eval_DRIVERLICENSE_f1': 0.9493410214168039, 'eval_EMAIL_f1': 0.9855403348554034, 'eval_GEOCOORD_f1': 0.9699769053117783, 'eval_GIVENNAME1_f1': 0.8597000483792937, 'eval_GIVENNAME2_f1': 0.7795648060548722, 'eval_IDCARD_f1': 0.9352302508423812, 'eval_IP_f1': 0.9829571625978811, 'eval_LASTNAME1_f1': 0.8318548387096774, 'eval_LASTNAME2_f1': 0.7231012658227849, 'eval_LASTNAME3_f1': 0.6937669376693767, 'eval_PASS_f1': 0.931161647203442, 'eval_PASSPORT_f1': 0.9533209952704093, 'eval_POSTCODE_f1': 0.9773023741194886, 'eval_SECADDRESS_f1': 0.9704347826086956, 'eval_SEX_f1': 0.9704641350210971, 'eval_SOCIALNUMBER_f1': 0.9518143961927424, 'eval_STATE_f1': 0.9784706755753526, 'eval_STREET_f1': 0.9680933852140078, 'eval_TEL_f1': 0.9687344913151364, 'eval

 76%|███████▋  | 30500/40000 [2:05:51<37:11,  4.26it/s]   

{'loss': 0.0064, 'grad_norm': 0.021740112453699112, 'learning_rate': 3.0806113102245395e-06, 'epoch': 32.62}


 78%|███████▊  | 31000/40000 [2:07:52<31:51,  4.71it/s]

{'loss': 0.0021, 'grad_norm': 0.21748167276382446, 'learning_rate': 2.780439060454756e-06, 'epoch': 33.16}


 79%|███████▉  | 31500/40000 [2:09:51<36:58,  3.83it/s]

{'loss': 0.0019, 'grad_norm': 0.0965208113193512, 'learning_rate': 2.493276947472756e-06, 'epoch': 33.69}


 80%|████████  | 32000/40000 [2:11:53<34:26,  3.87it/s]

{'loss': 0.0021, 'grad_norm': 0.08005595207214355, 'learning_rate': 2.2196424568156073e-06, 'epoch': 34.22}


 81%|████████▏ | 32501/40000 [2:13:52<27:04,  4.62it/s]

{'loss': 0.0018, 'grad_norm': 0.21769487857818604, 'learning_rate': 1.960028696330596e-06, 'epoch': 34.76}


 82%|████████▎ | 33000/40000 [2:15:52<29:35,  3.94it/s]

{'loss': 0.0019, 'grad_norm': 0.12997514009475708, 'learning_rate': 1.7149035075615795e-06, 'epoch': 35.29}


 84%|████████▍ | 33500/40000 [2:17:54<24:39,  4.39it/s]

{'loss': 0.0018, 'grad_norm': 0.141978457570076, 'learning_rate': 1.4847086226668871e-06, 'epoch': 35.83}


 85%|████████▌ | 34000/40000 [2:19:54<24:48,  4.03it/s]

{'loss': 0.0017, 'grad_norm': 0.03527776524424553, 'learning_rate': 1.2698588683881185e-06, 'epoch': 36.36}


 86%|████████▋ | 34500/40000 [2:21:56<23:01,  3.98it/s]

{'loss': 0.0017, 'grad_norm': 0.36577433347702026, 'learning_rate': 1.0707414185043163e-06, 'epoch': 36.9}


 88%|████████▊ | 35000/40000 [2:23:56<20:38,  4.04it/s]

{'loss': 0.0017, 'grad_norm': 0.06271906942129135, 'learning_rate': 8.87715096118642e-07, 'epoch': 37.43}


                                                       
 88%|████████▊ | 35000/40000 [2:24:25<20:38,  4.04it/s]

{'eval_loss': 0.06620169430971146, 'eval_BOD_f1': 0.9674498814399656, 'eval_BUILDING_f1': 0.981491002570694, 'eval_CARDISSUER_f1': 1.0, 'eval_CITY_f1': 0.9705304518664047, 'eval_COUNTRY_f1': 0.9679245283018868, 'eval_DATE_f1': 0.9198847262247839, 'eval_DRIVERLICENSE_f1': 0.9560691234645013, 'eval_EMAIL_f1': 0.9851598173515982, 'eval_GEOCOORD_f1': 0.9677419354838711, 'eval_GIVENNAME1_f1': 0.8610235265583313, 'eval_GIVENNAME2_f1': 0.791783380018674, 'eval_IDCARD_f1': 0.9380961286702824, 'eval_IP_f1': 0.9849988460650819, 'eval_LASTNAME1_f1': 0.8338073953677366, 'eval_LASTNAME2_f1': 0.7313195548489667, 'eval_LASTNAME3_f1': 0.6963350785340314, 'eval_PASS_f1': 0.9315320847405588, 'eval_PASSPORT_f1': 0.9535170711641301, 'eval_POSTCODE_f1': 0.9765258215962441, 'eval_SECADDRESS_f1': 0.9692396982008126, 'eval_SEX_f1': 0.972463408583478, 'eval_SOCIALNUMBER_f1': 0.953373991737163, 'eval_STATE_f1': 0.9782178217821782, 'eval_STREET_f1': 0.9686284677210267, 'eval_TEL_f1': 0.96875, 'eval_TIME_f1': 0.9

 89%|████████▉ | 35501/40000 [2:26:26<18:08,  4.13it/s]   

{'loss': 0.0017, 'grad_norm': 0.12487880885601044, 'learning_rate': 7.211097270349065e-07, 'epoch': 37.97}


 90%|█████████ | 36000/40000 [2:28:27<17:00,  3.92it/s]

{'loss': 0.0017, 'grad_norm': 0.14613975584506989, 'learning_rate': 5.71225545389158e-07, 'epoch': 38.5}


 91%|█████████▏| 36500/40000 [2:30:27<13:45,  4.24it/s]

{'loss': 0.0015, 'grad_norm': 0.0983932688832283, 'learning_rate': 4.3833265260749157e-07, 'epoch': 39.04}


 92%|█████████▎| 37000/40000 [2:32:28<12:54,  3.87it/s]

{'loss': 0.0036, 'grad_norm': 0.06612811237573624, 'learning_rate': 3.226705306650113e-07, 'epoch': 39.57}


 94%|█████████▍| 37500/40000 [2:34:28<08:51,  4.71it/s]

{'loss': 0.0015, 'grad_norm': 0.18941879272460938, 'learning_rate': 2.2444761052313857e-07, 'epoch': 40.11}


 95%|█████████▌| 38000/40000 [2:36:29<08:51,  3.76it/s]

{'loss': 0.0016, 'grad_norm': 0.16281139850616455, 'learning_rate': 1.4384089652291544e-07, 'epoch': 40.64}


 96%|█████████▋| 38500/40000 [2:38:30<05:20,  4.67it/s]

{'loss': 0.0023, 'grad_norm': 0.044106025248765945, 'learning_rate': 8.099564741123167e-08, 'epoch': 41.18}


 98%|█████████▊| 39000/40000 [2:40:30<04:03,  4.11it/s]

{'loss': 0.0015, 'grad_norm': 0.6095814108848572, 'learning_rate': 3.602511457473479e-08, 'epoch': 41.71}


 99%|█████████▉| 39500/40000 [2:42:31<01:57,  4.26it/s]

{'loss': 0.0041, 'grad_norm': 0.12819726765155792, 'learning_rate': 9.010337953185843e-09, 'epoch': 42.25}


100%|██████████| 40000/40000 [2:44:32<00:00,  4.21it/s]

{'loss': 0.0016, 'grad_norm': 0.1927604228258133, 'learning_rate': 0.0, 'epoch': 42.78}


                                                       
100%|██████████| 40000/40000 [2:45:01<00:00,  4.21it/s]

{'eval_loss': 0.06687254458665848, 'eval_BOD_f1': 0.9671848013816926, 'eval_BUILDING_f1': 0.9815005138746146, 'eval_CARDISSUER_f1': 0.0, 'eval_CITY_f1': 0.9705593719332678, 'eval_COUNTRY_f1': 0.9685138539042821, 'eval_DATE_f1': 0.9226327944572749, 'eval_DRIVERLICENSE_f1': 0.9569736021617129, 'eval_EMAIL_f1': 0.9851541682527598, 'eval_GEOCOORD_f1': 0.9677419354838711, 'eval_GIVENNAME1_f1': 0.8613455510007235, 'eval_GIVENNAME2_f1': 0.7895716945996275, 'eval_IDCARD_f1': 0.939365496527126, 'eval_IP_f1': 0.9852330410706046, 'eval_LASTNAME1_f1': 0.8338430173292558, 'eval_LASTNAME2_f1': 0.7272727272727272, 'eval_LASTNAME3_f1': 0.6918918918918918, 'eval_PASS_f1': 0.933046683046683, 'eval_PASSPORT_f1': 0.9550030819806863, 'eval_POSTCODE_f1': 0.9765013054830287, 'eval_SECADDRESS_f1': 0.9680789320951828, 'eval_SEX_f1': 0.9722222222222222, 'eval_SOCIALNUMBER_f1': 0.9524744697564809, 'eval_STATE_f1': 0.9787023278850916, 'eval_STREET_f1': 0.9688796680497925, 'eval_TEL_f1': 0.970946113732307, 'eval_T

100%|██████████| 40000/40000 [2:45:01<00:00,  4.04it/s]

{'train_runtime': 9901.8917, 'train_samples_per_second': 129.268, 'train_steps_per_second': 4.04, 'train_loss': 0.0695130168452859, 'epoch': 42.78}





TrainOutput(global_step=40000, training_loss=0.0695130168452859, metrics={'train_runtime': 9901.8917, 'train_samples_per_second': 129.268, 'train_steps_per_second': 4.04, 'total_flos': 3.328130452343112e+16, 'train_loss': 0.0695130168452859, 'epoch': 42.780748663101605})

{'eval_loss': 0.06687254458665848, 'eval_BOD_f1': 0.9671848013816926, 'eval_BUILDING_f1': 0.9815005138746146, 'eval_CARDISSUER_f1': 0.0, 'eval_CITY_f1': 0.9705593719332678, 'eval_COUNTRY_f1': 0.9685138539042821, 'eval_DATE_f1': 0.9226327944572749, 'eval_DRIVERLICENSE_f1': 0.9569736021617129, 'eval_EMAIL_f1': 0.9851541682527598, 'eval_GEOCOORD_f1': 0.9677419354838711, 'eval_GIVENNAME1_f1': 0.8613455510007235, 'eval_GIVENNAME2_f1': 0.7895716945996275, 'eval_IDCARD_f1': 0.939365496527126, 'eval_IP_f1': 0.9852330410706046, 'eval_LASTNAME1_f1': 0.8338430173292558, 'eval_LASTNAME2_f1': 0.7272727272727272, 'eval_LASTNAME3_f1': 0.6918918918918918, 'eval_PASS_f1': 0.933046683046683, 'eval_PASSPORT_f1': 0.9550030819806863, 'eval_POSTCODE_f1': 0.9765013054830287, 'eval_SECADDRESS_f1': 0.9680789320951828, 'eval_SEX_f1': 0.9722222222222222, 'eval_SOCIALNUMBER_f1': 0.9524744697564809, 'eval_STATE_f1': 0.9787023278850916, 'eval_STREET_f1': 0.9688796680497925, 'eval_TEL_f1': 0.970946113732307, 'eval_TIME_f1': 0.9695451549110086, 'eval_TITLE_f1': 0.9568607068607069, 'eval_USERNAME_f1': 0.9333333333333333, 'eval_precision': 0.9410922874603543, 'eval_recall': 0.952028798372266, 'eval_f1': 0.9465289529478127, 'eval_accuracy': 0.992088277633207, 'eval_runtime': 28.7216, 'eval_samples_per_second': 276.656, 'eval_steps_per_second': 8.669, 'epoch': 42.78}
