In [1]:
pip install "numpy<2.0"




In [2]:
pip install datasets

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2

In [4]:
pip install tqdm



In [3]:
pip install seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16161 sha256=f9dac11238d8aa16a78195c5a50cfaae95fc4a958089b14b0d99e10847a2884f
  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [5]:
pip install pytorch-crf


Collecting pytorch-crf
  Downloading pytorch_crf-0.7.2-py3-none-any.whl.metadata (2.4 kB)
Downloading pytorch_crf-0.7.2-py3-none-any.whl (9.5 kB)
Installing collected packages: pytorch-crf
Successfully installed pytorch-crf-0.7.2


In [6]:
pip install seaborn



In [7]:
pip install nltk



In [2]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from functools import partial
import uuid
from collections import Counter
from seqeval.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import numpy as np
import logging
import re


In [9]:
import logging

In [11]:
from transformers import AutoTokenizer

In [3]:
import torch
import torch.nn as nn
from torchcrf import CRF

class BiLSTMTextCNNCRF(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_labels, dropout=0.1, num_filters=100, filter_sizes=[3, 4, 5]):
        super(BiLSTMTextCNNCRF, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim // 2, bidirectional=True, batch_first=True)

        # TextCNN components
        self.convs = nn.ModuleList([
            nn.Conv1d(hidden_dim, num_filters, kernel_size)
            for kernel_size in filter_sizes
        ])

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim + len(filter_sizes) * num_filters, num_labels)
        self.crf = CRF(num_labels, batch_first=True)

    def forward(self, input_ids, attention_mask=None, labels=None):
        embedded = self.embedding(input_ids)
        lstm_out, _ = self.lstm(embedded)

        # TextCNN processing
        cnn_input = lstm_out.permute(0, 2, 1)  # Change to (batch, hidden_dim, seq_len)
        cnn_out = [torch.relu(conv(cnn_input)) for conv in self.convs]
        cnn_out = [nn.functional.max_pool1d(out, out.size(2)).squeeze(2) for out in cnn_out]
        cnn_out = torch.cat(cnn_out, dim=1)

        # Combine LSTM and CNN outputs
        combined = torch.cat([lstm_out, cnn_out.unsqueeze(1).expand(-1, lstm_out.size(1), -1)], dim=2)

        emissions = self.fc(self.dropout(combined))

        if labels is not None:
            mask = attention_mask.bool()
            loss = -self.crf(emissions, labels, mask=mask)
            return loss
        else:
            mask = attention_mask.bool()
            predictions = self.crf.decode(emissions, mask=mask)
            return predictions


In [4]:
class PIIDataset(torch.utils.data.Dataset):
    def __init__(self, examples, tokenizer, max_length):
        self.examples = examples
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        item = self.examples[idx]
        encoding = self.tokenizer(item['source_text'], truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        input_ids = encoding['input_ids'].squeeze()
        attention_mask = encoding['attention_mask'].squeeze()
        label_ids = torch.tensor(item['labels'], dtype=torch.long)
        # Instead of setting padding labels to -100, keep them as "O"
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': label_ids
        }

In [5]:
def process_data(example, labels2int, tokenizer, max_length):
    text, labels = example["source_text"], example["privacy_mask"]
    encoding = tokenizer(text, truncation=True, padding='max_length', max_length=max_length,
                         return_offsets_mapping=True, return_special_tokens_mask=True)
    label_ids = [labels2int["O"]] * len(encoding['input_ids'])
    for label in labels:
        start_char, end_char = label['start'], label['end']
        token_start = encoding.char_to_token(start_char)
        token_end = encoding.char_to_token(end_char - 1)
        if token_start is not None and token_end is not None:
            label_type = label['label']
            if label_type != "CARDISSUER":
                if label_type.startswith("GIVENNAME"):
                    label_type = "GIVENNAME"
                elif label_type.startswith("LASTNAME"):
                    label_type = "LASTNAME"

                label_ids[token_start] = labels2int.get(f'B-{label_type}', labels2int["O"])
                for i in range(token_start + 1, token_end + 1):
                    label_ids[i] = labels2int.get(f'I-{label_type}', labels2int["O"])

    # Set labels for special tokens and padding to "O" instead of -100
    for i, (input_id, special_token_mask) in enumerate(zip(encoding['input_ids'], encoding['special_tokens_mask'])):
        if special_token_mask == 1 or tokenizer.convert_ids_to_tokens(input_id).startswith("##"):
            label_ids[i] = labels2int["O"]

    return {
        'input_ids': encoding['input_ids'],
        'attention_mask': encoding['attention_mask'],
        'labels': label_ids
    }

In [6]:
from tqdm import tqdm

def train(model, train_loader, val_loader, optimizer, device, num_epochs, id2label, patience=3):
    best_val_loss = float('inf')
    epochs_no_improve = 0
    best_model = None
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            loss = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            progress_bar.set_postfix({'train_loss': f"{loss.item():.4f}"})

        avg_train_loss = total_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        print(f"\nEpoch {epoch+1}/{num_epochs}, Average training loss: {avg_train_loss:.4f}")

        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validation"):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                loss = model(input_ids, attention_mask=attention_mask, labels=labels)
                val_loss += loss.item()

                preds = model(input_ids, attention_mask=attention_mask)

                for i, mask in enumerate(attention_mask):
                    pred = [id2label[p] for p, m in zip(preds[i], mask) if m.item() == 1]
                    true = [id2label[l.item()] for l, m in zip(labels[i], mask) if m.item() == 1]
                    if pred and true:
                        all_preds.append(pred)
                        all_labels.append(true)

        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        print(f"Validation loss: {avg_val_loss:.4f}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            epochs_no_improve = 0
            best_model = model.state_dict()
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                print(f"Early stopping triggered after epoch {epoch+1}")
                model.load_state_dict(best_model)
                break

        print("\nClassification Report:")
        print(classification_report(all_labels, all_preds))

    # Plot learning curve
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Learning Curve')
    plt.legend()
    plt.savefig('learning_curve.png')
    plt.close()

    return model, all_labels, all_preds

In [7]:
def plot_confusion_matrix(true_labels, pred_labels, label_list):
    true_flat = [item for sublist in true_labels for item in sublist]
    pred_flat = [item for sublist in pred_labels for item in sublist]

    cm = confusion_matrix(true_flat, pred_flat, labels=label_list)

    plt.figure(figsize=(20, 16))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_list, yticklabels=label_list)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.tight_layout()
    plt.savefig('confusion_matrix.png')
    plt.close()

In [9]:
def main():
    # Define labels (excluding CARDISSUER)
    labels = [
        "BOD", "BUILDING", "CITY", "COUNTRY", "DATE", "DRIVERLICENSE",
        "EMAIL", "GEOCOORD", "GIVENNAME", "IDCARD", "IP", "LASTNAME",
        "PASS", "PASSPORT", "POSTCODE", "SECADDRESS", "SEX",
        "SOCIALNUMBER", "STATE", "STREET", "TEL", "TIME", "TITLE", "USERNAME"
    ]

    label_list = ["O"] + [f"B-{label}" for label in labels] + [f"I-{label}" for label in labels]
    label2id = {label: i for i, label in enumerate(label_list)}
    id2label = {v: k for k, v in label2id.items()}

    # Load tokenizer and dataset
    tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
    ds = load_dataset("ai4privacy/pii-masking-300k")
    ds = ds.filter(lambda x: x["language"] == "English", num_proc=4)
    ds = ds.filter(lambda x: "CARDISSUER" not in x["mbert_bio_labels"], num_proc=4)

    max_length = 512
    ds = ds.map(
        partial(process_data, labels2int=label2id, tokenizer=tokenizer, max_length=max_length),
        remove_columns=['privacy_mask', 'span_labels', 'mbert_text_tokens', 'mbert_bio_labels', 'id', 'language', 'set'],
        num_proc=8
    )

    # Split the validation set into validation and test sets
    val_test_split = 0.3
    val_dataset = ds["validation"]
    val_dataset = val_dataset.shuffle(seed=42)
    val_size = int(len(val_dataset) * (1 - val_test_split))
    test_size = len(val_dataset) - val_size
    val_dataset, test_dataset = val_dataset.select(range(val_size)), val_dataset.select(range(val_size, len(val_dataset)))

    # Create datasets and dataloaders
    train_dataset = PIIDataset(ds["train"], tokenizer, max_length)
    val_dataset = PIIDataset(val_dataset, tokenizer, max_length)
    test_dataset = PIIDataset(test_dataset, tokenizer, max_length)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64)
    test_loader = DataLoader(test_dataset, batch_size=64)

    # Initialize model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    model = BiLSTMTextCNNCRF(
        vocab_size=tokenizer.vocab_size,
        embedding_dim=300,
        hidden_dim=256,
        num_labels=len(label_list)
    ).to(device)

    # Define optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)

    # Train the model
    num_epochs = 50
    print(f"Starting training for {num_epochs} epochs...")
    model, true_labels, pred_labels = train(model, train_loader, val_loader, optimizer, device, num_epochs, id2label)

    # Save the model
    model_save_path = f"bilstm_textcnn_crf_pii_model_{str(uuid.uuid4())}.pth"
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved successfully to {model_save_path}")

    # Test the model
    model.eval()
    test_preds = []
    test_labels = []
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            preds = model(input_ids, attention_mask=attention_mask)

            for i, mask in enumerate(attention_mask):
                pred = [id2label[p] for p, m in zip(preds[i], mask) if m.item() == 1]
                true = [id2label[l.item()] for l, m in zip(labels[i], mask) if m.item() == 1]
                if pred and true:
                    test_preds.append(pred)
                    test_labels.append(true)

    # Print test results
    print("\nTest Results:")
    print(classification_report(test_labels, test_preds))
     # After training and testing
    plot_confusion_matrix(test_labels, test_preds, label_list)


In [12]:
if __name__ == "__main__":
    main()



Map (num_proc=8):   0%|          | 0/29908 [00:00<?, ? examples/s]

Map (num_proc=8):   0%|          | 0/7946 [00:00<?, ? examples/s]

Using device: cuda
Starting training for 50 epochs...


Epoch 1/50: 100%|██████████████████████████████████████████████| 935/935 [06:21<00:00,  2.45it/s, train_loss=1857.7751]



Epoch 1/50, Average training loss: 4945.9581


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 6651.4563

Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


               precision    recall  f1-score   support

          BOD       0.00      0.00      0.00      1916
     BUILDING       0.00      0.00      0.00      1356
         CITY       0.00      0.00      0.00      1653
      COUNTRY       0.00      0.00      0.00      1128
         DATE       0.00      0.00      0.00      1475
DRIVERLICENSE       0.00      0.00      0.00      2984
        EMAIL       0.00      0.00      0.00      5402
     GEOCOORD       0.00      0.00      0.00       305
    GIVENNAME       0.00      0.00      0.00      1854
       IDCARD       0.00      0.00      0.00      1834
           IP       0.00      0.00      0.00      6754
     LASTNAME       0.00      0.00      0.00      2281
         PASS       0.00      0.00      0.00      2270
     PASSPORT       0.00      0.00      0.00      1709
     POSTCODE       0.00      0.00      0.00      1957
   SECADDRESS       0.00      0.00      0.00       853
          SEX       0.00      0.00      0.00      1508
 SOCIALNU

Epoch 2/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.48it/s, train_loss=1774.0564]



Epoch 2/50, Average training loss: 3048.6098


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.21it/s]


Validation loss: 5366.1326

Classification Report:
               precision    recall  f1-score   support

          BOD       0.00      0.00      0.00      1916
     BUILDING       0.00      0.00      0.00      1356
         CITY       0.00      0.00      0.00      1653
      COUNTRY       0.00      0.00      0.00      1128
         DATE       0.00      0.00      0.00      1475
DRIVERLICENSE       0.00      0.00      0.00      2984
        EMAIL       0.55      0.25      0.34      5402
     GEOCOORD       0.00      0.00      0.00       305
    GIVENNAME       0.00      0.00      0.00      1854
       IDCARD       0.00      0.00      0.00      1834
           IP       0.44      0.24      0.31      6754
     LASTNAME       0.00      0.00      0.00      2281
         PASS       0.00      0.00      0.00      2270
     PASSPORT       0.00      0.00      0.00      1709
     POSTCODE       0.00      0.00      0.00      1957
   SECADDRESS       0.00      0.00      0.00       853
          SEX

Epoch 3/50: 100%|██████████████████████████████████████████████| 935/935 [06:18<00:00,  2.47it/s, train_loss=1298.4816]



Epoch 3/50, Average training loss: 2420.2805


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 4210.2338

Classification Report:
               precision    recall  f1-score   support

          BOD       0.02      0.02      0.02      1916
     BUILDING       0.00      0.00      0.00      1356
         CITY       0.00      0.00      0.00      1653
      COUNTRY       0.00      0.00      0.00      1128
         DATE       0.00      0.00      0.00      1475
DRIVERLICENSE       0.00      0.00      0.00      2984
        EMAIL       0.64      0.45      0.53      5402
     GEOCOORD       0.00      0.00      0.00       305
    GIVENNAME       0.00      0.00      0.00      1854
       IDCARD       0.00      0.00      0.00      1834
           IP       0.62      0.61      0.61      6754
     LASTNAME       0.00      0.00      0.00      2281
         PASS       0.00      0.00      0.00      2270
     PASSPORT       0.00      0.00      0.00      1709
     POSTCODE       0.00      0.00      0.00      1957
   SECADDRESS       0.00      0.00      0.00       853
          SEX

Epoch 4/50: 100%|███████████████████████████████████████████████| 935/935 [06:13<00:00,  2.50it/s, train_loss=685.7034]



Epoch 4/50, Average training loss: 1924.9527


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 3364.8226

Classification Report:
               precision    recall  f1-score   support

          BOD       0.18      0.23      0.20      1916
     BUILDING       1.00      0.00      0.00      1356
         CITY       0.00      0.00      0.00      1653
      COUNTRY       0.01      0.00      0.01      1128
         DATE       0.05      0.02      0.03      1475
DRIVERLICENSE       0.01      0.00      0.00      2984
        EMAIL       0.81      0.57      0.67      5402
     GEOCOORD       0.00      0.00      0.00       305
    GIVENNAME       0.00      0.00      0.00      1854
       IDCARD       0.00      0.00      0.00      1834
           IP       0.72      0.73      0.72      6754
     LASTNAME       0.00      0.00      0.00      2281
         PASS       0.00      0.00      0.00      2270
     PASSPORT       1.00      0.00      0.00      1709
     POSTCODE       0.00      0.00      0.00      1957
   SECADDRESS       0.00      0.00      0.00       853
          SEX

Epoch 5/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=1404.3062]



Epoch 5/50, Average training loss: 1557.0344


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:14<00:00,  1.17it/s]


Validation loss: 2732.6884

Classification Report:
               precision    recall  f1-score   support

          BOD       0.36      0.42      0.39      1916
     BUILDING       0.85      0.12      0.21      1356
         CITY       0.27      0.00      0.00      1653
      COUNTRY       0.78      0.50      0.61      1128
         DATE       0.27      0.19      0.23      1475
DRIVERLICENSE       0.27      0.05      0.08      2984
        EMAIL       0.83      0.70      0.76      5402
     GEOCOORD       0.00      0.00      0.00       305
    GIVENNAME       0.00      0.00      0.00      1854
       IDCARD       0.68      0.04      0.08      1834
           IP       0.78      0.80      0.79      6754
     LASTNAME       0.20      0.00      0.00      2281
         PASS       0.44      0.00      0.01      2270
     PASSPORT       0.64      0.12      0.21      1709
     POSTCODE       0.61      0.05      0.09      1957
   SECADDRESS       0.00      0.00      0.00       853
          SEX

Epoch 6/50: 100%|███████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=872.9976]



Epoch 6/50, Average training loss: 1285.4967


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 2266.8790

Classification Report:
               precision    recall  f1-score   support

          BOD       0.44      0.58      0.50      1916
     BUILDING       0.75      0.42      0.54      1356
         CITY       0.45      0.05      0.09      1653
      COUNTRY       0.88      0.70      0.78      1128
         DATE       0.38      0.31      0.34      1475
DRIVERLICENSE       0.55      0.28      0.37      2984
        EMAIL       0.84      0.80      0.82      5402
     GEOCOORD       0.00      0.00      0.00       305
    GIVENNAME       0.30      0.01      0.02      1854
       IDCARD       0.60      0.25      0.36      1834
           IP       0.82      0.86      0.84      6754
     LASTNAME       0.43      0.02      0.03      2281
         PASS       0.38      0.06      0.11      2270
     PASSPORT       0.68      0.40      0.51      1709
     POSTCODE       0.58      0.25      0.35      1957
   SECADDRESS       0.00      0.00      0.00       853
          SEX

Epoch 7/50: 100%|███████████████████████████████████████████████| 935/935 [06:17<00:00,  2.47it/s, train_loss=698.6881]



Epoch 7/50, Average training loss: 1085.9387


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 1930.2566

Classification Report:
               precision    recall  f1-score   support

          BOD       0.55      0.59      0.57      1916
     BUILDING       0.76      0.54      0.63      1356
         CITY       0.53      0.12      0.19      1653
      COUNTRY       0.90      0.75      0.82      1128
         DATE       0.44      0.47      0.46      1475
DRIVERLICENSE       0.62      0.39      0.48      2984
        EMAIL       0.86      0.86      0.86      5402
     GEOCOORD       0.03      0.03      0.03       305
    GIVENNAME       0.31      0.03      0.05      1854
       IDCARD       0.65      0.40      0.50      1834
           IP       0.84      0.89      0.87      6754
     LASTNAME       0.46      0.06      0.11      2281
         PASS       0.42      0.14      0.21      2270
     PASSPORT       0.70      0.54      0.61      1709
     POSTCODE       0.59      0.37      0.46      1957
   SECADDRESS       0.43      0.02      0.03       853
          SEX

Epoch 8/50: 100%|███████████████████████████████████████████████| 935/935 [06:14<00:00,  2.50it/s, train_loss=547.1093]



Epoch 8/50, Average training loss: 941.3948


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 1687.4407

Classification Report:
               precision    recall  f1-score   support

          BOD       0.63      0.65      0.64      1916
     BUILDING       0.80      0.63      0.70      1356
         CITY       0.52      0.24      0.32      1653
      COUNTRY       0.92      0.80      0.86      1128
         DATE       0.52      0.58      0.55      1475
DRIVERLICENSE       0.69      0.48      0.57      2984
        EMAIL       0.89      0.89      0.89      5402
     GEOCOORD       0.17      0.14      0.15       305
    GIVENNAME       0.35      0.08      0.13      1854
       IDCARD       0.63      0.56      0.60      1834
           IP       0.87      0.90      0.88      6754
     LASTNAME       0.52      0.10      0.16      2281
         PASS       0.40      0.19      0.26      2270
     PASSPORT       0.78      0.59      0.67      1709
     POSTCODE       0.65      0.45      0.53      1957
   SECADDRESS       0.57      0.12      0.20       853
          SEX

Epoch 9/50: 100%|███████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=535.7245]



Epoch 9/50, Average training loss: 833.2123


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 1502.4395

Classification Report:
               precision    recall  f1-score   support

          BOD       0.68      0.72      0.69      1916
     BUILDING       0.86      0.67      0.75      1356
         CITY       0.56      0.29      0.38      1653
      COUNTRY       0.92      0.81      0.86      1128
         DATE       0.59      0.64      0.61      1475
DRIVERLICENSE       0.71      0.54      0.61      2984
        EMAIL       0.90      0.91      0.91      5402
     GEOCOORD       0.34      0.29      0.31       305
    GIVENNAME       0.40      0.06      0.11      1854
       IDCARD       0.66      0.60      0.63      1834
           IP       0.87      0.93      0.90      6754
     LASTNAME       0.43      0.24      0.30      2281
         PASS       0.40      0.23      0.29      2270
     PASSPORT       0.78      0.66      0.71      1709
     POSTCODE       0.71      0.49      0.58      1957
   SECADDRESS       0.67      0.23      0.34       853
          SEX

Epoch 10/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=402.4376]



Epoch 10/50, Average training loss: 749.8197


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.21it/s]


Validation loss: 1362.0678

Classification Report:
               precision    recall  f1-score   support

          BOD       0.72      0.73      0.73      1916
     BUILDING       0.86      0.72      0.78      1356
         CITY       0.57      0.35      0.43      1653
      COUNTRY       0.92      0.83      0.87      1128
         DATE       0.62      0.71      0.66      1475
DRIVERLICENSE       0.75      0.56      0.64      2984
        EMAIL       0.91      0.92      0.91      5402
     GEOCOORD       0.54      0.52      0.53       305
    GIVENNAME       0.47      0.14      0.21      1854
       IDCARD       0.69      0.63      0.66      1834
           IP       0.89      0.93      0.91      6754
     LASTNAME       0.48      0.27      0.34      2281
         PASS       0.42      0.27      0.32      2270
     PASSPORT       0.76      0.71      0.73      1709
     POSTCODE       0.71      0.56      0.63      1957
   SECADDRESS       0.70      0.33      0.45       853
          SEX

Epoch 11/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=594.6633]



Epoch 11/50, Average training loss: 683.8117


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.19it/s]


Validation loss: 1249.4678

Classification Report:
               precision    recall  f1-score   support

          BOD       0.75      0.75      0.75      1916
     BUILDING       0.90      0.72      0.80      1356
         CITY       0.59      0.37      0.45      1653
      COUNTRY       0.92      0.83      0.87      1128
         DATE       0.66      0.73      0.69      1475
DRIVERLICENSE       0.76      0.59      0.67      2984
        EMAIL       0.92      0.92      0.92      5402
     GEOCOORD       0.54      0.51      0.52       305
    GIVENNAME       0.51      0.12      0.19      1854
       IDCARD       0.67      0.68      0.68      1834
           IP       0.90      0.94      0.92      6754
     LASTNAME       0.46      0.32      0.38      2281
         PASS       0.44      0.29      0.35      2270
     PASSPORT       0.81      0.70      0.75      1709
     POSTCODE       0.76      0.56      0.64      1957
   SECADDRESS       0.73      0.34      0.46       853
          SEX

Epoch 12/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=322.3943]



Epoch 12/50, Average training loss: 629.5189


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:11<00:00,  1.22it/s]


Validation loss: 1155.0782

Classification Report:
               precision    recall  f1-score   support

          BOD       0.76      0.79      0.78      1916
     BUILDING       0.91      0.75      0.82      1356
         CITY       0.61      0.41      0.49      1653
      COUNTRY       0.92      0.85      0.88      1128
         DATE       0.71      0.77      0.74      1475
DRIVERLICENSE       0.76      0.64      0.70      2984
        EMAIL       0.92      0.93      0.93      5402
     GEOCOORD       0.63      0.62      0.63       305
    GIVENNAME       0.51      0.15      0.23      1854
       IDCARD       0.70      0.69      0.70      1834
           IP       0.91      0.94      0.93      6754
     LASTNAME       0.47      0.35      0.40      2281
         PASS       0.46      0.32      0.38      2270
     PASSPORT       0.80      0.73      0.76      1709
     POSTCODE       0.77      0.62      0.68      1957
   SECADDRESS       0.75      0.40      0.52       853
          SEX

Epoch 13/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=409.8174]



Epoch 13/50, Average training loss: 584.4926


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 1078.4935

Classification Report:
               precision    recall  f1-score   support

          BOD       0.77      0.82      0.80      1916
     BUILDING       0.92      0.78      0.84      1356
         CITY       0.60      0.44      0.51      1653
      COUNTRY       0.92      0.85      0.88      1128
         DATE       0.74      0.77      0.76      1475
DRIVERLICENSE       0.76      0.68      0.72      2984
        EMAIL       0.93      0.94      0.93      5402
     GEOCOORD       0.65      0.65      0.65       305
    GIVENNAME       0.52      0.23      0.32      1854
       IDCARD       0.75      0.68      0.71      1834
           IP       0.92      0.94      0.93      6754
     LASTNAME       0.53      0.34      0.42      2281
         PASS       0.48      0.34      0.40      2270
     PASSPORT       0.80      0.75      0.77      1709
     POSTCODE       0.78      0.65      0.71      1957
   SECADDRESS       0.76      0.45      0.57       853
          SEX

Epoch 14/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=208.7238]



Epoch 14/50, Average training loss: 546.3983


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:11<00:00,  1.21it/s]


Validation loss: 1016.8506

Classification Report:
               precision    recall  f1-score   support

          BOD       0.82      0.80      0.81      1916
     BUILDING       0.92      0.80      0.86      1356
         CITY       0.62      0.46      0.53      1653
      COUNTRY       0.92      0.86      0.89      1128
         DATE       0.74      0.82      0.78      1475
DRIVERLICENSE       0.78      0.70      0.74      2984
        EMAIL       0.93      0.94      0.94      5402
     GEOCOORD       0.67      0.66      0.66       305
    GIVENNAME       0.55      0.26      0.36      1854
       IDCARD       0.74      0.69      0.72      1834
           IP       0.93      0.95      0.94      6754
     LASTNAME       0.56      0.36      0.44      2281
         PASS       0.50      0.36      0.42      2270
     PASSPORT       0.79      0.77      0.78      1709
     POSTCODE       0.79      0.68      0.73      1957
   SECADDRESS       0.75      0.47      0.58       853
          SEX

Epoch 15/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=253.9262]



Epoch 15/50, Average training loss: 513.7425


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 959.3813

Classification Report:
               precision    recall  f1-score   support

          BOD       0.82      0.84      0.83      1916
     BUILDING       0.92      0.82      0.87      1356
         CITY       0.63      0.48      0.54      1653
      COUNTRY       0.92      0.87      0.89      1128
         DATE       0.76      0.81      0.79      1475
DRIVERLICENSE       0.80      0.71      0.75      2984
        EMAIL       0.94      0.94      0.94      5402
     GEOCOORD       0.72      0.74      0.73       305
    GIVENNAME       0.57      0.26      0.36      1854
       IDCARD       0.72      0.73      0.72      1834
           IP       0.93      0.96      0.95      6754
     LASTNAME       0.56      0.38      0.46      2281
         PASS       0.50      0.38      0.43      2270
     PASSPORT       0.83      0.76      0.79      1709
     POSTCODE       0.81      0.71      0.76      1957
   SECADDRESS       0.76      0.50      0.60       853
          SEX 

Epoch 16/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.48it/s, train_loss=229.4177]



Epoch 16/50, Average training loss: 485.0652


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:10<00:00,  1.23it/s]


Validation loss: 912.3008

Classification Report:
               precision    recall  f1-score   support

          BOD       0.85      0.85      0.85      1916
     BUILDING       0.92      0.84      0.88      1356
         CITY       0.63      0.49      0.55      1653
      COUNTRY       0.93      0.88      0.90      1128
         DATE       0.77      0.83      0.80      1475
DRIVERLICENSE       0.82      0.71      0.76      2984
        EMAIL       0.94      0.94      0.94      5402
     GEOCOORD       0.73      0.75      0.74       305
    GIVENNAME       0.59      0.25      0.35      1854
       IDCARD       0.72      0.75      0.74      1834
           IP       0.93      0.97      0.95      6754
     LASTNAME       0.53      0.43      0.47      2281
         PASS       0.53      0.42      0.47      2270
     PASSPORT       0.81      0.78      0.80      1709
     POSTCODE       0.82      0.74      0.78      1957
   SECADDRESS       0.77      0.54      0.63       853
          SEX 

Epoch 17/50: 100%|██████████████████████████████████████████████| 935/935 [06:14<00:00,  2.50it/s, train_loss=381.0367]



Epoch 17/50, Average training loss: 460.1100


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 869.9017

Classification Report:
               precision    recall  f1-score   support

          BOD       0.84      0.88      0.86      1916
     BUILDING       0.91      0.85      0.88      1356
         CITY       0.64      0.51      0.57      1653
      COUNTRY       0.93      0.88      0.90      1128
         DATE       0.81      0.84      0.82      1475
DRIVERLICENSE       0.82      0.73      0.77      2984
        EMAIL       0.94      0.95      0.94      5402
     GEOCOORD       0.74      0.78      0.76       305
    GIVENNAME       0.56      0.37      0.44      1854
       IDCARD       0.72      0.75      0.74      1834
           IP       0.94      0.96      0.95      6754
     LASTNAME       0.59      0.41      0.48      2281
         PASS       0.53      0.44      0.48      2270
     PASSPORT       0.82      0.79      0.80      1709
     POSTCODE       0.83      0.76      0.79      1957
   SECADDRESS       0.78      0.56      0.65       853
          SEX 

Epoch 18/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.47it/s, train_loss=197.9601]



Epoch 18/50, Average training loss: 438.3554


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 837.1715

Classification Report:
               precision    recall  f1-score   support

          BOD       0.86      0.86      0.86      1916
     BUILDING       0.92      0.85      0.89      1356
         CITY       0.64      0.53      0.58      1653
      COUNTRY       0.93      0.89      0.91      1128
         DATE       0.80      0.85      0.82      1475
DRIVERLICENSE       0.82      0.74      0.78      2984
        EMAIL       0.94      0.95      0.95      5402
     GEOCOORD       0.75      0.78      0.77       305
    GIVENNAME       0.61      0.29      0.39      1854
       IDCARD       0.78      0.71      0.74      1834
           IP       0.95      0.96      0.95      6754
     LASTNAME       0.57      0.44      0.50      2281
         PASS       0.57      0.46      0.51      2270
     PASSPORT       0.78      0.82      0.80      1709
     POSTCODE       0.82      0.77      0.80      1957
   SECADDRESS       0.80      0.58      0.67       853
          SEX 

Epoch 19/50: 100%|██████████████████████████████████████████████| 935/935 [06:19<00:00,  2.47it/s, train_loss=386.7078]



Epoch 19/50, Average training loss: 417.8655


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 802.9555

Classification Report:
               precision    recall  f1-score   support

          BOD       0.87      0.88      0.87      1916
     BUILDING       0.91      0.86      0.89      1356
         CITY       0.66      0.54      0.59      1653
      COUNTRY       0.94      0.90      0.92      1128
         DATE       0.81      0.87      0.83      1475
DRIVERLICENSE       0.81      0.78      0.79      2984
        EMAIL       0.94      0.96      0.95      5402
     GEOCOORD       0.78      0.82      0.80       305
    GIVENNAME       0.61      0.35      0.45      1854
       IDCARD       0.77      0.73      0.75      1834
           IP       0.95      0.97      0.96      6754
     LASTNAME       0.57      0.48      0.52      2281
         PASS       0.58      0.48      0.53      2270
     PASSPORT       0.82      0.80      0.81      1709
     POSTCODE       0.84      0.80      0.82      1957
   SECADDRESS       0.82      0.61      0.70       853
          SEX 

Epoch 20/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.48it/s, train_loss=195.7977]



Epoch 20/50, Average training loss: 400.1286


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 774.2172

Classification Report:
               precision    recall  f1-score   support

          BOD       0.88      0.88      0.88      1916
     BUILDING       0.91      0.87      0.89      1356
         CITY       0.65      0.54      0.59      1653
      COUNTRY       0.93      0.91      0.92      1128
         DATE       0.82      0.87      0.85      1475
DRIVERLICENSE       0.83      0.77      0.80      2984
        EMAIL       0.94      0.96      0.95      5402
     GEOCOORD       0.78      0.83      0.80       305
    GIVENNAME       0.61      0.39      0.48      1854
       IDCARD       0.75      0.75      0.75      1834
           IP       0.95      0.97      0.96      6754
     LASTNAME       0.60      0.47      0.53      2281
         PASS       0.60      0.51      0.55      2270
     PASSPORT       0.82      0.81      0.81      1709
     POSTCODE       0.84      0.82      0.83      1957
   SECADDRESS       0.82      0.63      0.71       853
          SEX 

Epoch 21/50: 100%|██████████████████████████████████████████████| 935/935 [06:14<00:00,  2.49it/s, train_loss=266.8405]



Epoch 21/50, Average training loss: 383.9608


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 747.0054

Classification Report:
               precision    recall  f1-score   support

          BOD       0.88      0.89      0.89      1916
     BUILDING       0.93      0.87      0.90      1356
         CITY       0.68      0.55      0.61      1653
      COUNTRY       0.94      0.91      0.92      1128
         DATE       0.84      0.87      0.85      1475
DRIVERLICENSE       0.83      0.78      0.80      2984
        EMAIL       0.94      0.96      0.95      5402
     GEOCOORD       0.78      0.82      0.80       305
    GIVENNAME       0.59      0.47      0.52      1854
       IDCARD       0.76      0.74      0.75      1834
           IP       0.95      0.97      0.96      6754
     LASTNAME       0.66      0.44      0.53      2281
         PASS       0.61      0.52      0.56      2270
     PASSPORT       0.81      0.82      0.82      1709
     POSTCODE       0.87      0.82      0.84      1957
   SECADDRESS       0.84      0.66      0.74       853
          SEX 

Epoch 22/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.48it/s, train_loss=281.9963]



Epoch 22/50, Average training loss: 368.6695


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 725.2978

Classification Report:
               precision    recall  f1-score   support

          BOD       0.89      0.90      0.89      1916
     BUILDING       0.94      0.86      0.90      1356
         CITY       0.70      0.57      0.63      1653
      COUNTRY       0.95      0.91      0.93      1128
         DATE       0.85      0.86      0.86      1475
DRIVERLICENSE       0.83      0.78      0.80      2984
        EMAIL       0.95      0.96      0.95      5402
     GEOCOORD       0.77      0.79      0.78       305
    GIVENNAME       0.63      0.43      0.51      1854
       IDCARD       0.78      0.73      0.76      1834
           IP       0.96      0.97      0.96      6754
     LASTNAME       0.64      0.47      0.54      2281
         PASS       0.63      0.53      0.57      2270
     PASSPORT       0.82      0.82      0.82      1709
     POSTCODE       0.87      0.82      0.85      1957
   SECADDRESS       0.85      0.67      0.75       853
          SEX 

Epoch 23/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=183.1773]



Epoch 23/50, Average training loss: 354.6093


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.19it/s]


Validation loss: 703.9653

Classification Report:
               precision    recall  f1-score   support

          BOD       0.89      0.89      0.89      1916
     BUILDING       0.93      0.88      0.90      1356
         CITY       0.71      0.58      0.64      1653
      COUNTRY       0.94      0.91      0.93      1128
         DATE       0.84      0.88      0.86      1475
DRIVERLICENSE       0.84      0.80      0.82      2984
        EMAIL       0.95      0.96      0.96      5402
     GEOCOORD       0.79      0.82      0.80       305
    GIVENNAME       0.64      0.45      0.53      1854
       IDCARD       0.77      0.74      0.76      1834
           IP       0.96      0.97      0.96      6754
     LASTNAME       0.67      0.47      0.55      2281
         PASS       0.64      0.54      0.59      2270
     PASSPORT       0.81      0.83      0.82      1709
     POSTCODE       0.88      0.84      0.86      1957
   SECADDRESS       0.86      0.70      0.77       853
          SEX 

Epoch 24/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=467.4493]



Epoch 24/50, Average training loss: 342.1290


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:14<00:00,  1.17it/s]


Validation loss: 683.0546

Classification Report:
               precision    recall  f1-score   support

          BOD       0.89      0.90      0.90      1916
     BUILDING       0.93      0.88      0.91      1356
         CITY       0.70      0.59      0.64      1653
      COUNTRY       0.94      0.92      0.93      1128
         DATE       0.85      0.88      0.87      1475
DRIVERLICENSE       0.84      0.80      0.82      2984
        EMAIL       0.95      0.96      0.96      5402
     GEOCOORD       0.79      0.84      0.81       305
    GIVENNAME       0.65      0.45      0.53      1854
       IDCARD       0.77      0.75      0.76      1834
           IP       0.96      0.97      0.96      6754
     LASTNAME       0.64      0.52      0.57      2281
         PASS       0.65      0.56      0.60      2270
     PASSPORT       0.83      0.82      0.82      1709
     POSTCODE       0.88      0.85      0.87      1957
   SECADDRESS       0.86      0.72      0.79       853
          SEX 

Epoch 25/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=173.6403]



Epoch 25/50, Average training loss: 330.0946


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 665.2647

Classification Report:
               precision    recall  f1-score   support

          BOD       0.90      0.92      0.91      1916
     BUILDING       0.93      0.89      0.91      1356
         CITY       0.72      0.60      0.65      1653
      COUNTRY       0.94      0.92      0.93      1128
         DATE       0.86      0.88      0.87      1475
DRIVERLICENSE       0.85      0.80      0.82      2984
        EMAIL       0.95      0.97      0.96      5402
     GEOCOORD       0.80      0.83      0.81       305
    GIVENNAME       0.61      0.52      0.56      1854
       IDCARD       0.75      0.77      0.76      1834
           IP       0.96      0.97      0.97      6754
     LASTNAME       0.67      0.49      0.57      2281
         PASS       0.66      0.58      0.62      2270
     PASSPORT       0.82      0.83      0.83      1709
     POSTCODE       0.89      0.86      0.87      1957
   SECADDRESS       0.85      0.74      0.79       853
          SEX 

Epoch 26/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.48it/s, train_loss=137.7720]



Epoch 26/50, Average training loss: 318.9819


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 649.9397

Classification Report:
               precision    recall  f1-score   support

          BOD       0.91      0.91      0.91      1916
     BUILDING       0.93      0.89      0.91      1356
         CITY       0.72      0.61      0.66      1653
      COUNTRY       0.94      0.93      0.94      1128
         DATE       0.85      0.89      0.87      1475
DRIVERLICENSE       0.86      0.79      0.83      2984
        EMAIL       0.95      0.97      0.96      5402
     GEOCOORD       0.80      0.85      0.82       305
    GIVENNAME       0.61      0.53      0.57      1854
       IDCARD       0.77      0.76      0.76      1834
           IP       0.96      0.97      0.97      6754
     LASTNAME       0.68      0.49      0.57      2281
         PASS       0.67      0.58      0.62      2270
     PASSPORT       0.80      0.85      0.82      1709
     POSTCODE       0.89      0.87      0.88      1957
   SECADDRESS       0.87      0.77      0.82       853
          SEX 

Epoch 27/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.49it/s, train_loss=724.5241]



Epoch 27/50, Average training loss: 308.8691


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.21it/s]


Validation loss: 633.2399

Classification Report:
               precision    recall  f1-score   support

          BOD       0.90      0.92      0.91      1916
     BUILDING       0.93      0.89      0.91      1356
         CITY       0.72      0.62      0.67      1653
      COUNTRY       0.95      0.93      0.94      1128
         DATE       0.88      0.88      0.88      1475
DRIVERLICENSE       0.87      0.80      0.83      2984
        EMAIL       0.95      0.97      0.96      5402
     GEOCOORD       0.81      0.86      0.83       305
    GIVENNAME       0.63      0.52      0.57      1854
       IDCARD       0.76      0.77      0.77      1834
           IP       0.96      0.97      0.97      6754
     LASTNAME       0.68      0.51      0.58      2281
         PASS       0.68      0.60      0.64      2270
     PASSPORT       0.82      0.84      0.83      1709
     POSTCODE       0.90      0.88      0.89      1957
   SECADDRESS       0.88      0.77      0.82       853
          SEX 

Epoch 28/50: 100%|██████████████████████████████████████████████| 935/935 [06:14<00:00,  2.50it/s, train_loss=242.2592]



Epoch 28/50, Average training loss: 298.7979


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:11<00:00,  1.21it/s]


Validation loss: 621.1158

Classification Report:
               precision    recall  f1-score   support

          BOD       0.91      0.91      0.91      1916
     BUILDING       0.93      0.90      0.91      1356
         CITY       0.73      0.63      0.67      1653
      COUNTRY       0.95      0.93      0.94      1128
         DATE       0.85      0.90      0.87      1475
DRIVERLICENSE       0.86      0.81      0.84      2984
        EMAIL       0.96      0.97      0.96      5402
     GEOCOORD       0.82      0.87      0.85       305
    GIVENNAME       0.64      0.52      0.57      1854
       IDCARD       0.76      0.77      0.77      1834
           IP       0.96      0.98      0.97      6754
     LASTNAME       0.68      0.52      0.59      2281
         PASS       0.69      0.60      0.64      2270
     PASSPORT       0.83      0.84      0.83      1709
     POSTCODE       0.90      0.88      0.89      1957
   SECADDRESS       0.90      0.79      0.84       853
          SEX 

Epoch 29/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=186.8837]



Epoch 29/50, Average training loss: 289.6193


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:10<00:00,  1.23it/s]


Validation loss: 606.0089

Classification Report:
               precision    recall  f1-score   support

          BOD       0.91      0.92      0.92      1916
     BUILDING       0.94      0.89      0.92      1356
         CITY       0.74      0.63      0.68      1653
      COUNTRY       0.95      0.93      0.94      1128
         DATE       0.87      0.89      0.88      1475
DRIVERLICENSE       0.86      0.82      0.84      2984
        EMAIL       0.96      0.97      0.96      5402
     GEOCOORD       0.81      0.84      0.82       305
    GIVENNAME       0.67      0.49      0.57      1854
       IDCARD       0.77      0.77      0.77      1834
           IP       0.97      0.97      0.97      6754
     LASTNAME       0.67      0.55      0.60      2281
         PASS       0.70      0.61      0.65      2270
     PASSPORT       0.83      0.83      0.83      1709
     POSTCODE       0.90      0.89      0.89      1957
   SECADDRESS       0.92      0.81      0.86       853
          SEX 

Epoch 30/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=606.7929]



Epoch 30/50, Average training loss: 280.9737


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 594.7470

Classification Report:
               precision    recall  f1-score   support

          BOD       0.91      0.92      0.91      1916
     BUILDING       0.93      0.91      0.92      1356
         CITY       0.74      0.64      0.69      1653
      COUNTRY       0.95      0.94      0.94      1128
         DATE       0.87      0.89      0.88      1475
DRIVERLICENSE       0.87      0.81      0.84      2984
        EMAIL       0.96      0.97      0.96      5402
     GEOCOORD       0.83      0.87      0.85       305
    GIVENNAME       0.64      0.54      0.59      1854
       IDCARD       0.75      0.78      0.77      1834
           IP       0.97      0.97      0.97      6754
     LASTNAME       0.69      0.52      0.60      2281
         PASS       0.70      0.61      0.66      2270
     PASSPORT       0.84      0.83      0.83      1709
     POSTCODE       0.90      0.89      0.90      1957
   SECADDRESS       0.91      0.83      0.87       853
          SEX 

Epoch 31/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=148.7084]



Epoch 31/50, Average training loss: 272.7273


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:11<00:00,  1.22it/s]


Validation loss: 585.1170

Classification Report:
               precision    recall  f1-score   support

          BOD       0.92      0.91      0.92      1916
     BUILDING       0.95      0.90      0.92      1356
         CITY       0.74      0.64      0.69      1653
      COUNTRY       0.96      0.93      0.94      1128
         DATE       0.87      0.89      0.88      1475
DRIVERLICENSE       0.87      0.81      0.84      2984
        EMAIL       0.96      0.97      0.96      5402
     GEOCOORD       0.83      0.86      0.85       305
    GIVENNAME       0.66      0.52      0.59      1854
       IDCARD       0.78      0.77      0.77      1834
           IP       0.97      0.97      0.97      6754
     LASTNAME       0.68      0.54      0.60      2281
         PASS       0.72      0.62      0.67      2270
     PASSPORT       0.83      0.83      0.83      1709
     POSTCODE       0.91      0.89      0.90      1957
   SECADDRESS       0.92      0.82      0.86       853
          SEX 

Epoch 32/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=173.6227]



Epoch 32/50, Average training loss: 264.6111


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 573.5299

Classification Report:
               precision    recall  f1-score   support

          BOD       0.92      0.93      0.92      1916
     BUILDING       0.93      0.91      0.92      1356
         CITY       0.74      0.66      0.70      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.89      0.89      0.89      1475
DRIVERLICENSE       0.87      0.82      0.84      2984
        EMAIL       0.96      0.97      0.97      5402
     GEOCOORD       0.83      0.88      0.86       305
    GIVENNAME       0.65      0.54      0.59      1854
       IDCARD       0.78      0.77      0.77      1834
           IP       0.97      0.97      0.97      6754
     LASTNAME       0.69      0.55      0.61      2281
         PASS       0.72      0.64      0.68      2270
     PASSPORT       0.81      0.85      0.83      1709
     POSTCODE       0.91      0.90      0.91      1957
   SECADDRESS       0.92      0.84      0.88       853
          SEX 

Epoch 33/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=327.2307]



Epoch 33/50, Average training loss: 257.3309


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 562.2535

Classification Report:
               precision    recall  f1-score   support

          BOD       0.92      0.93      0.92      1916
     BUILDING       0.95      0.90      0.92      1356
         CITY       0.75      0.67      0.71      1653
      COUNTRY       0.95      0.94      0.95      1128
         DATE       0.87      0.90      0.89      1475
DRIVERLICENSE       0.87      0.83      0.85      2984
        EMAIL       0.96      0.97      0.97      5402
     GEOCOORD       0.84      0.88      0.86       305
    GIVENNAME       0.64      0.57      0.60      1854
       IDCARD       0.77      0.78      0.77      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.70      0.54      0.61      2281
         PASS       0.72      0.65      0.68      2270
     PASSPORT       0.83      0.83      0.83      1709
     POSTCODE       0.91      0.90      0.91      1957
   SECADDRESS       0.93      0.84      0.88       853
          SEX 

Epoch 34/50: 100%|██████████████████████████████████████████████| 935/935 [06:14<00:00,  2.50it/s, train_loss=490.1055]



Epoch 34/50, Average training loss: 250.1392


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 552.6853

Classification Report:
               precision    recall  f1-score   support

          BOD       0.92      0.93      0.92      1916
     BUILDING       0.94      0.91      0.92      1356
         CITY       0.76      0.67      0.72      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.88      0.89      0.89      1475
DRIVERLICENSE       0.87      0.82      0.85      2984
        EMAIL       0.96      0.97      0.97      5402
     GEOCOORD       0.84      0.89      0.86       305
    GIVENNAME       0.64      0.57      0.60      1854
       IDCARD       0.77      0.78      0.77      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.70      0.54      0.61      2281
         PASS       0.73      0.65      0.69      2270
     PASSPORT       0.82      0.85      0.83      1709
     POSTCODE       0.91      0.91      0.91      1957
   SECADDRESS       0.93      0.85      0.89       853
          SEX 

Epoch 35/50: 100%|██████████████████████████████████████████████| 935/935 [06:19<00:00,  2.46it/s, train_loss=104.0332]



Epoch 35/50, Average training loss: 243.3908


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 545.1247

Classification Report:
               precision    recall  f1-score   support

          BOD       0.92      0.93      0.93      1916
     BUILDING       0.95      0.90      0.93      1356
         CITY       0.77      0.68      0.72      1653
      COUNTRY       0.95      0.94      0.95      1128
         DATE       0.89      0.89      0.89      1475
DRIVERLICENSE       0.87      0.83      0.85      2984
        EMAIL       0.96      0.97      0.97      5402
     GEOCOORD       0.83      0.88      0.85       305
    GIVENNAME       0.65      0.55      0.60      1854
       IDCARD       0.78      0.78      0.78      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.67      0.57      0.62      2281
         PASS       0.73      0.66      0.70      2270
     PASSPORT       0.82      0.85      0.84      1709
     POSTCODE       0.92      0.90      0.91      1957
   SECADDRESS       0.93      0.86      0.89       853
          SEX 

Epoch 36/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=273.1484]



Epoch 36/50, Average training loss: 236.6716


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 537.3488

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.91      0.93      1356
         CITY       0.76      0.70      0.73      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.88      0.82      0.85      2984
        EMAIL       0.96      0.98      0.97      5402
     GEOCOORD       0.84      0.88      0.86       305
    GIVENNAME       0.65      0.56      0.60      1854
       IDCARD       0.77      0.78      0.78      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.68      0.57      0.62      2281
         PASS       0.74      0.66      0.70      2270
     PASSPORT       0.82      0.86      0.84      1709
     POSTCODE       0.92      0.91      0.91      1957
   SECADDRESS       0.93      0.87      0.90       853
          SEX 

Epoch 37/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=123.2170]



Epoch 37/50, Average training loss: 230.8600


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 530.0440

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.95      0.91      0.93      1356
         CITY       0.77      0.70      0.73      1653
      COUNTRY       0.95      0.94      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.88      0.82      0.85      2984
        EMAIL       0.96      0.97      0.97      5402
     GEOCOORD       0.84      0.88      0.86       305
    GIVENNAME       0.66      0.56      0.60      1854
       IDCARD       0.77      0.79      0.78      1834
           IP       0.97      0.97      0.97      6754
     LASTNAME       0.68      0.57      0.62      2281
         PASS       0.74      0.67      0.71      2270
     PASSPORT       0.83      0.84      0.84      1709
     POSTCODE       0.92      0.91      0.91      1957
   SECADDRESS       0.94      0.87      0.90       853
          SEX 

Epoch 38/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=121.3217]



Epoch 38/50, Average training loss: 224.4585


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 523.2708

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.92      0.93      1356
         CITY       0.78      0.70      0.74      1653
      COUNTRY       0.95      0.94      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.87      0.84      0.86      2984
        EMAIL       0.96      0.98      0.97      5402
     GEOCOORD       0.84      0.89      0.86       305
    GIVENNAME       0.66      0.57      0.61      1854
       IDCARD       0.79      0.77      0.78      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.69      0.57      0.63      2281
         PASS       0.74      0.68      0.71      2270
     PASSPORT       0.82      0.86      0.84      1709
     POSTCODE       0.93      0.91      0.92      1957
   SECADDRESS       0.94      0.88      0.91       853
          SEX 

Epoch 39/50: 100%|██████████████████████████████████████████████| 935/935 [06:19<00:00,  2.46it/s, train_loss=103.6318]



Epoch 39/50, Average training loss: 218.5366


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 515.5859

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.92      0.93      1356
         CITY       0.78      0.71      0.74      1653
      COUNTRY       0.96      0.94      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.88      0.83      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.84      0.88      0.86       305
    GIVENNAME       0.67      0.55      0.60      1854
       IDCARD       0.80      0.77      0.78      1834
           IP       0.97      0.97      0.97      6754
     LASTNAME       0.69      0.57      0.63      2281
         PASS       0.75      0.68      0.71      2270
     PASSPORT       0.81      0.87      0.84      1709
     POSTCODE       0.92      0.91      0.92      1957
   SECADDRESS       0.95      0.87      0.91       853
          SEX 

Epoch 40/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=103.2700]



Epoch 40/50, Average training loss: 213.3161


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.19it/s]


Validation loss: 508.3262

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.92      0.93      1356
         CITY       0.79      0.71      0.75      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.88      0.91      0.89      1475
DRIVERLICENSE       0.88      0.84      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.84      0.89      0.87       305
    GIVENNAME       0.67      0.56      0.61      1854
       IDCARD       0.79      0.78      0.79      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.68      0.60      0.64      2281
         PASS       0.75      0.69      0.72      2270
     PASSPORT       0.81      0.87      0.84      1709
     POSTCODE       0.93      0.92      0.92      1957
   SECADDRESS       0.94      0.88      0.91       853
          SEX 

Epoch 41/50: 100%|██████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=120.8353]



Epoch 41/50, Average training loss: 207.7659


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:14<00:00,  1.18it/s]


Validation loss: 502.7762

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.92      0.93      1356
         CITY       0.79      0.71      0.75      1653
      COUNTRY       0.96      0.94      0.95      1128
         DATE       0.89      0.89      0.89      1475
DRIVERLICENSE       0.88      0.84      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.83      0.88      0.85       305
    GIVENNAME       0.66      0.58      0.62      1854
       IDCARD       0.79      0.78      0.78      1834
           IP       0.97      0.98      0.98      6754
     LASTNAME       0.70      0.57      0.63      2281
         PASS       0.76      0.67      0.72      2270
     PASSPORT       0.82      0.87      0.84      1709
     POSTCODE       0.93      0.91      0.92      1957
   SECADDRESS       0.95      0.88      0.91       853
          SEX 

Epoch 42/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=178.7500]



Epoch 42/50, Average training loss: 203.0699


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.21it/s]


Validation loss: 496.8134

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.95      0.92      0.93      1356
         CITY       0.79      0.73      0.76      1653
      COUNTRY       0.96      0.94      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.88      0.84      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.83      0.88      0.86       305
    GIVENNAME       0.66      0.60      0.63      1854
       IDCARD       0.79      0.77      0.78      1834
           IP       0.97      0.98      0.98      6754
     LASTNAME       0.72      0.56      0.63      2281
         PASS       0.76      0.68      0.72      2270
     PASSPORT       0.83      0.86      0.84      1709
     POSTCODE       0.93      0.92      0.92      1957
   SECADDRESS       0.94      0.89      0.91       853
          SEX 

Epoch 43/50: 100%|██████████████████████████████████████████████| 935/935 [06:17<00:00,  2.48it/s, train_loss=118.8665]



Epoch 43/50, Average training loss: 198.0320


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.19it/s]


Validation loss: 490.8293

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.92      0.93      1356
         CITY       0.79      0.73      0.76      1653
      COUNTRY       0.96      0.95      0.95      1128
         DATE       0.89      0.90      0.89      1475
DRIVERLICENSE       0.88      0.84      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.84      0.88      0.86       305
    GIVENNAME       0.66      0.60      0.63      1854
       IDCARD       0.79      0.78      0.79      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.70      0.59      0.64      2281
         PASS       0.76      0.69      0.72      2270
     PASSPORT       0.82      0.87      0.84      1709
     POSTCODE       0.93      0.92      0.92      1957
   SECADDRESS       0.94      0.89      0.92       853
          SEX 

Epoch 44/50: 100%|██████████████████████████████████████████████| 935/935 [06:16<00:00,  2.49it/s, train_loss=114.2780]



Epoch 44/50, Average training loss: 192.9961


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.20it/s]


Validation loss: 486.1278

Classification Report:
               precision    recall  f1-score   support

          BOD       0.92      0.94      0.93      1916
     BUILDING       0.94      0.93      0.93      1356
         CITY       0.80      0.74      0.77      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.88      0.85      0.87      2984
        EMAIL       0.97      0.98      0.97      5402
     GEOCOORD       0.84      0.89      0.87       305
    GIVENNAME       0.64      0.61      0.63      1854
       IDCARD       0.76      0.81      0.78      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.69      0.60      0.64      2281
         PASS       0.76      0.70      0.73      2270
     PASSPORT       0.86      0.84      0.85      1709
     POSTCODE       0.93      0.92      0.93      1957
   SECADDRESS       0.94      0.91      0.93       853
          SEX 

Epoch 45/50: 100%|███████████████████████████████████████████████| 935/935 [06:15<00:00,  2.49it/s, train_loss=41.4109]



Epoch 45/50, Average training loss: 188.3011


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:11<00:00,  1.21it/s]


Validation loss: 482.4288

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.92      0.92      1916
     BUILDING       0.94      0.93      0.93      1356
         CITY       0.79      0.74      0.77      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.87      0.91      0.89      1475
DRIVERLICENSE       0.89      0.83      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.85      0.89      0.87       305
    GIVENNAME       0.67      0.59      0.63      1854
       IDCARD       0.77      0.80      0.78      1834
           IP       0.97      0.98      0.98      6754
     LASTNAME       0.70      0.59      0.64      2281
         PASS       0.77      0.69      0.73      2270
     PASSPORT       0.84      0.86      0.85      1709
     POSTCODE       0.92      0.93      0.93      1957
   SECADDRESS       0.94      0.91      0.92       853
          SEX 

Epoch 46/50: 100%|███████████████████████████████████████████████| 935/935 [06:16<00:00,  2.48it/s, train_loss=78.7460]



Epoch 46/50, Average training loss: 184.0643


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 475.7603

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.93      0.93      1916
     BUILDING       0.94      0.93      0.93      1356
         CITY       0.80      0.75      0.77      1653
      COUNTRY       0.95      0.95      0.95      1128
         DATE       0.88      0.90      0.89      1475
DRIVERLICENSE       0.90      0.83      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.85      0.89      0.87       305
    GIVENNAME       0.68      0.59      0.64      1854
       IDCARD       0.76      0.81      0.78      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.71      0.58      0.64      2281
         PASS       0.76      0.70      0.73      2270
     PASSPORT       0.84      0.86      0.85      1709
     POSTCODE       0.92      0.93      0.93      1957
   SECADDRESS       0.94      0.91      0.93       853
          SEX 

Epoch 47/50: 100%|███████████████████████████████████████████████| 935/935 [06:33<00:00,  2.38it/s, train_loss=66.0214]



Epoch 47/50, Average training loss: 179.6612


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.18it/s]


Validation loss: 474.7144

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.92      0.93      1916
     BUILDING       0.95      0.92      0.94      1356
         CITY       0.82      0.75      0.78      1653
      COUNTRY       0.96      0.95      0.95      1128
         DATE       0.89      0.90      0.89      1475
DRIVERLICENSE       0.89      0.84      0.86      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.85      0.88      0.86       305
    GIVENNAME       0.66      0.62      0.64      1854
       IDCARD       0.78      0.79      0.79      1834
           IP       0.97      0.98      0.97      6754
     LASTNAME       0.71      0.59      0.64      2281
         PASS       0.77      0.70      0.73      2270
     PASSPORT       0.84      0.86      0.85      1709
     POSTCODE       0.94      0.92      0.93      1957
   SECADDRESS       0.94      0.90      0.92       853
          SEX 

Epoch 48/50: 100%|██████████████████████████████████████████████| 935/935 [06:25<00:00,  2.43it/s, train_loss=286.5181]



Epoch 48/50, Average training loss: 175.4414


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:11<00:00,  1.22it/s]


Validation loss: 467.6709

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.92      0.93      1916
     BUILDING       0.94      0.93      0.93      1356
         CITY       0.82      0.75      0.79      1653
      COUNTRY       0.96      0.95      0.96      1128
         DATE       0.88      0.91      0.89      1475
DRIVERLICENSE       0.89      0.85      0.87      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.84      0.89      0.86       305
    GIVENNAME       0.65      0.62      0.64      1854
       IDCARD       0.79      0.78      0.79      1834
           IP       0.97      0.98      0.98      6754
     LASTNAME       0.72      0.58      0.64      2281
         PASS       0.77      0.70      0.73      2270
     PASSPORT       0.84      0.86      0.85      1709
     POSTCODE       0.94      0.93      0.93      1957
   SECADDRESS       0.95      0.91      0.93       853
          SEX 

Epoch 49/50: 100%|██████████████████████████████████████████████| 935/935 [06:32<00:00,  2.38it/s, train_loss=175.3680]



Epoch 49/50, Average training loss: 171.3424


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:13<00:00,  1.19it/s]


Validation loss: 463.2804

Classification Report:
               precision    recall  f1-score   support

          BOD       0.93      0.92      0.93      1916
     BUILDING       0.94      0.92      0.93      1356
         CITY       0.81      0.77      0.79      1653
      COUNTRY       0.96      0.96      0.96      1128
         DATE       0.89      0.91      0.90      1475
DRIVERLICENSE       0.90      0.84      0.87      2984
        EMAIL       0.97      0.98      0.97      5402
     GEOCOORD       0.85      0.88      0.87       305
    GIVENNAME       0.65      0.63      0.64      1854
       IDCARD       0.79      0.79      0.79      1834
           IP       0.98      0.98      0.98      6754
     LASTNAME       0.70      0.59      0.64      2281
         PASS       0.77      0.71      0.74      2270
     PASSPORT       0.83      0.86      0.85      1709
     POSTCODE       0.94      0.93      0.93      1957
   SECADDRESS       0.95      0.91      0.93       853
          SEX 

Epoch 50/50: 100%|███████████████████████████████████████████████| 935/935 [06:33<00:00,  2.38it/s, train_loss=93.5404]



Epoch 50/50, Average training loss: 167.8436


Validation: 100%|██████████████████████████████████████████████████████████████████████| 87/87 [01:12<00:00,  1.19it/s]


Validation loss: 459.4186

Classification Report:
               precision    recall  f1-score   support

          BOD       0.94      0.92      0.93      1916
     BUILDING       0.95      0.93      0.94      1356
         CITY       0.83      0.77      0.79      1653
      COUNTRY       0.96      0.95      0.96      1128
         DATE       0.88      0.91      0.89      1475
DRIVERLICENSE       0.89      0.85      0.87      2984
        EMAIL       0.97      0.97      0.97      5402
     GEOCOORD       0.84      0.89      0.87       305
    GIVENNAME       0.66      0.61      0.64      1854
       IDCARD       0.78      0.80      0.79      1834
           IP       0.97      0.98      0.98      6754
     LASTNAME       0.71      0.59      0.65      2281
         PASS       0.78      0.71      0.75      2270
     PASSPORT       0.84      0.86      0.85      1709
     POSTCODE       0.93      0.93      0.93      1957
   SECADDRESS       0.95      0.92      0.93       853
          SEX 

Testing: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:29<00:00,  1.31it/s]



Test Results:
               precision    recall  f1-score   support

          BOD       0.95      0.92      0.93       781
     BUILDING       0.95      0.89      0.92       586
         CITY       0.81      0.75      0.77       715
      COUNTRY       0.96      0.97      0.96       443
         DATE       0.88      0.89      0.88       573
DRIVERLICENSE       0.91      0.88      0.90      1298
        EMAIL       0.97      0.97      0.97      2212
     GEOCOORD       0.76      0.84      0.80       122
    GIVENNAME       0.65      0.63      0.64       743
       IDCARD       0.82      0.82      0.82       834
           IP       0.97      0.98      0.98      2856
     LASTNAME       0.75      0.61      0.67      1035
         PASS       0.84      0.77      0.81       871
     PASSPORT       0.83      0.83      0.83       752
     POSTCODE       0.91      0.94      0.92       797
   SECADDRESS       0.95      0.87      0.91       398
          SEX       0.94      0.95      0.95     