Create Longformer Concatenation Dataset

In [None]:
!pip install -q datasets
!pip install -q evaluate
!pip install -q seqeval
!pip install -q google-cloud-storage gcsfs

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/480.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m471.0/480.6 kB[0m [31m16.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependenc

In [None]:
import numpy as np
import gcsfs
# from transformers import LongformerForTokenClassification, Trainer, TrainingArguments
from transformers import Trainer, TrainingArguments
from datasets import load_from_disk, Dataset, IterableDataset, load_dataset, Features, Sequence, Value
import torch
import torch.nn as nn
import evaluate
from seqeval.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score

In [None]:
# use for vertex ai / google cloud
from google.cloud import storage

client = storage.Client()
bucket_name = 'w266-project'
bucket = client.get_bucket(bucket_name)
path = f'gs://{bucket_name}'
vertex_path = '/content'

fs = gcsfs.GCSFileSystem()

# use for google collab
# from google.colab import drive

# drive.mount('/content/drive')
# path = '/content/drive/MyDrive/Colab Notebooks/DATASCI 266/266 project'

In [None]:
def select_data(split, task, size):
    """
    Loads the appropriate dataset per folder structure here: https://drive.google.com/drive/folders/1C3h3rXdbr9nVAC3_G_I-72DfKNiDU_Pa
    Input:
        Split: ['train', 'val', 'test']
        Task: ['ner', 'mask', 'both']
        Size: ['testing', 'mini', 'full']
    Returns:
        Huggingface dataset
    """
    if split not in ['train', 'val', 'test']:
        raise ValueError("Split value must be in ['train', 'val', 'test']")
    if task not in ['ner', 'mask', 'both', 'binary']:
        raise ValueError("Task value must be in ['ner', 'mask', 'both']")
    if size not in ['testing', 'mini', 'full']:
        raise ValueError("Size value must be in ['testing', 'mini', 'full']")

    path_label = {'both': 'longformer', 'ner': 'longformer_ner', 'mask': 'longformer_mask', 'binary': 'longformer_binary'}
    # path_label = {'both': 'longformer', 'ner': 'longformer_ner', 'mask': 'longformer_4096'}

    if size == 'testing':
        ds = load_from_disk(f'{path}/data/tab/{path_label[task]}/lf_{split}_testing')
    if size == 'mini':
        if split == 'train':
            ds = load_from_disk(f'{path}/data/tab/{path_label[task]}/lf_{split}_400')
        else:
            ds = load_from_disk(f'{path}/data/tab/{path_label[task]}/lf_{split}_50')
    if size == 'full':
        ds = load_from_disk(f'{path}/data/tab/{path_label[task]}/lf_{split}')

    return ds

def convert_ids_to_labels(pred, true, task):
    """
    Retrieves label prediction from raw predictions then generates y_pred, y_true for seqeval. Converts
    integers into class labels.

    Input:
        pred = raw predictions from model
        true = original labels from dataset
    Output:
        y_pred
        y_true
    """
    if task == 'ner':
        labels = ['O', 'B-PERSON', 'I-PERSON', 'B-CODE', 'I-CODE', 'B-LOC', 'I-LOC', 'B-ORG', 'I-ORG',
        'B-DEM', 'I-DEM', 'B-DATETIME', 'I-DATETIME', 'B-QUANTITY', 'I-QUANTITY', 'B-MISC', 'I-MISC']
    if task == 'mask':
        labels = ['O', 'B-NO_MASK', 'I-NO_MASK', 'B-DIRECT', 'I-DIRECT', 'B-QUASI', 'I-QUASI']

    # create y_pred
    y_pred = [np.argmax(p, axis=1) for p in pred]
    y_pred = [[labels[x] for x in p] for p in y_pred]

    # create y_true
    y_true = [[0 if x == -100 else x for x in sample] for sample in true]
    y_true = [[labels[x] for x in p] for p in y_true]

    return y_pred, y_true

# Create Iterable Dataset

In [None]:
split = 'train'
file_path = f'{path}/data/tab/concatenated_mini/{split}/concat_0.npy'

with fs.open(file_path, 'rb') as f:
  hidden_states_1 = np.load(f)

In [None]:
file_path2 = f'{path}/data/tab/concatenated_mini/{split}/concat_1.npy'
with fs.open(file_path2, 'rb') as f:
  hidden_states_2 = np.load(f)

In [None]:
file_path3 = f'{path}/data/tab/concatenated_mini/{split}/concat_2.npy'
with fs.open(file_path3, 'rb') as f:
  hidden_states_3 = np.load(f)

In [None]:
file_path4 = f'{path}/data/tab/concatenated_mini/{split}/concat_3.npy'
with fs.open(file_path4, 'rb') as f:
  hidden_states_4 = np.load(f)

In [None]:
hidden_states_128 = np.concatenate((hidden_states_1, hidden_states_2), axis=0)
new_path = f'{path}/data/tab/concatenated_mini/concat_train_128.npy'
with fs.open(new_path, 'wb') as f:
  np.save(f, hidden_states_128)

KeyboardInterrupt: 

In [None]:
hidden_states = np.concatenate((hidden_states_1, hidden_states_2), axis=0)

In [None]:
del hidden_states_1
del hidden_states_2
# del hidden_states_3
# del hidden_states_4

## Create DS

In [None]:
new_path = f'{path}/data/tab/concatenated_mini/concat_train_128.npy'
with fs.open(new_path, 'rb') as f:
  hidden_states = np.load(f)

KeyboardInterrupt: 

In [None]:
ds_train = select_data(split='train', task='mask', size='mini')
labels = ds_train['train']['labels'][:64]

In [None]:
ds_train = {'input_ids': torch.tensor(hidden_states),
      'labels': torch.tensor(labels)}
ds = Dataset.from_dict(ds)

In [None]:
# huggingface does not support streaming with its save_to_disk / arrow format
ds.save_to_disk(f'{path}/data/tab/concatenated_mini/concat_train_128')

Saving the dataset (0/4 shards):   0%|          | 0/64 [00:00<?, ? examples/s]

In [None]:
# ds = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_train_128')
ds.to_json(f'{path}/data/tab/concatenated_mini/concat_train_64.json')

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

5381531953

In [None]:
del hidden_states
del ds

In [None]:
# validation
split = 'val'
file_path = f'{path}/data/tab/concatenated_mini/{split}/concat_0.npy'

with fs.open(file_path, 'rb') as f:
  val_hidden_states = np.load(f)

ds_val = select_data(split='val', task='mask', size='mini')
labels = ds_val['train']['labels'][:32]

In [None]:
ds = {'input_ids': val_hidden_states,
      'labels': labels}
ds = Dataset.from_dict(ds)

In [None]:
# huggingface does not support streaming with its save_to_disk / arrow format
ds.save_to_disk(f'{path}/data/tab/concatenated_mini/concat_val_32')

Saving the dataset (0/2 shards):   0%|          | 0/32 [00:00<?, ? examples/s]

In [None]:
# ds = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_val_32')
ds.to_json(f'{path}/data/tab/concatenated_mini/concat_val_32.json')

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

2690127948

In [None]:
del val_hidden_states
del ds

In [None]:
# test
split = 'test'
file_path = f'{path}/data/tab/concatenated_mini/{split}/concat_0.npy'

with fs.open(file_path, 'rb') as f:
  test_hidden_states = np.load(f)

ds_test = select_data(split='test', task='mask', size='mini')
labels = ds_test['train']['labels'][:32]

In [None]:
ds = {'input_ids': test_hidden_states,
      'labels': labels}
ds = Dataset.from_dict(ds)

In [None]:
# huggingface does not support streaming with its save_to_disk / arrow format
ds.save_to_disk(f'{path}/data/tab/concatenated_mini/concat_test_32')
# ds = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_test_32')
ds.to_json(f'{path}/data/tab/concatenated_mini/concat_test_32.json')

Saving the dataset (0/2 shards):   0%|          | 0/32 [00:00<?, ? examples/s]

Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

2690233136

In [None]:
del test_hidden_states
del ds

In [None]:
def generator(n):
    for i in range(n):
        yield {'input_ids': i, 'labels': i}

# Test iterable dataset

In [None]:
my_iterable_dataset = load_dataset("json", data_files=f'{path}/data/tab/concatenated_mini/concat_test_32.json', split='train', streaming=True)
for example in my_iterable_dataset:
  print(example)

Buffered data was truncated after reaching the output size limit.

# Model

In [None]:
# classes
class ConcatTokenClassificationModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, num_classes)
        )
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids, labels=None):
        # print(f"Input shape: {input_ids.shape}")
        batch_size, seq_length, input_dim = input_ids.size()
        input_ids = input_ids.view(-1, input_dim)

        logits = self.linear_relu_stack(input_ids)
        # print(f"Logits shape (flattened): {logits.shape}")

        # Reshape back to (batch_size, seq_length, num_classes)
        logits = logits.view(batch_size, seq_length, -1)
        # print(f"Logits reshaped: {logits.shape}")

        if labels is not None:
            # print(f"Labels shape before flattening: {labels.shape}")
            # print(f'Logits view/size shape: {logits.view(-1, logits.size(-1)).shape}')
            # print(f'Logits view/size shape: {labels.view(-1).shape}')
            loss = self.loss_fn(logits.view(-1, logits.size(-1)), labels.view(-1))
            return {"logits": logits, "loss": loss}

        return {"logits": logits}

seq_length = 4096
input_dim = 1536
hidden_dim = 512
num_classes = 7

model = ConcatTokenClassificationModel(input_dim=input_dim,
                                       hidden_dim=hidden_dim,
                                       num_classes=num_classes)

In [None]:
def compute_metrics(p):
    seqeval = evaluate.load('seqeval')

    predictions, labels = p
    # print(f'Initial predictions shape: {predictions.shape}')
    # print(f'Initial labels shape: {labels.shape}')
    # predictions = predictions[0] # outcoming dim is (1, 32, 4096, 7) instead of (32, 4096, 7)

    # labels = labels[0] # outcoming dim is (1, 32, 4096) instead of (32, 4096)
    print(f'New predictions shape: {predictions.shape}')
    print(f'New labels shape: {labels.shape}')

    predictions = np.argmax(predictions, axis=-1)

    label_list = ['O', 'B-NO_MASK', 'I-NO_MASK', 'B-DIRECT', 'I-DIRECT', 'B-QUASI', 'I-QUASI']
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels, zero_division=1)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "seqeval_acc": results["overall_accuracy"],
    }

def count_trainable_parameters(model):
    # Get the trainable parameters of the model
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return trainable_params

In [None]:
seq_length = 4096
input_dim = 1536
hidden_dim = 512
num_classes = 7

model = ConcatTokenClassificationModel(input_dim=input_dim,
                                       hidden_dim=hidden_dim,
                                       num_classes=num_classes)

In [None]:
print(model)

ConcatTokenClassificationModel(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=1536, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=512, out_features=128, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.1, inplace=False)
    (6): Linear(in_features=128, out_features=7, bias=True)
  )
  (loss_fn): CrossEntropyLoss()
)


In [None]:
f'{path}/data/tab/concatenated_mini/concat_train_128.json'

'gs://w266-project/data/tab/concatenated_mini/concat_train_128.json'

In [None]:
gsutil ls gs://w266-project/data/tab/concatenated_mini/

SyntaxError: invalid syntax (<ipython-input-25-e174b73cc4ec>, line 1)

In [None]:
# Ensure gcsfs is installed
gcs_path = f'{path}/data/tab/concatenated_mini/concat_train_128.json'

try:
    ds_train = load_from_disk(gcs_path)  # Requires gcsfs to support GCS paths
    print(dataset)
except FileNotFoundError as e:
    print(f"Error: {e}")

Error: Directory gs://w266-project/data/tab/concatenated_mini/concat_train_128.json not found


In [None]:
ds_train = load_dataset("json", data_files=f'{path}/data/tab/concatenated_mini/concat_train_64.json', split='train', streaming=True)
ds_val = load_dataset("json", data_files=f'{path}/data/tab/concatenated_mini/concat_val_32.json', split='train', streaming=True)

# ds_train = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_train_128')
# ds_val = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_val_32')

In [None]:
# TrainingArguments w/o eval
model_name = 'concat_base_0.01_test'

batch_size = 8
num_train_epochs = 20
max_steps = (64 // batch_size) * num_train_epochs

training_args = TrainingArguments(
    output_dir=f'{path}/models/{model_name}/results',
    eval_strategy='epoch',
    save_strategy='epoch',
    logging_strategy='epoch',
    save_total_limit=2,
    load_best_model_at_end=True,
    save_only_model=True,
    metric_for_best_model='eval_loss',
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    greater_is_better=False,
    learning_rate=0.01,
    max_steps=max_steps, # overrides num_train_epochs
    # num_train_epochs=num_train_epochs,
    report_to='none')

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_val,
    # train_dataset=ds_train.with_format("torch"),
    # eval_dataset=ds_val.with_format("torch"),
    compute_metrics=compute_metrics
)



max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Seqeval Acc
0,5.2788,4.655761,1.0,0.0,0.0,0.792309
1,4.5823,3.093016,0.014319,0.009014,0.011064,0.424874
2,3.9457,1.480406,0.025178,0.013376,0.017471,0.755568
3,1.4581,1.19409,0.0,0.0,0.0,0.790364
4,1.1171,0.975374,0.823529,0.032568,0.062657,0.794532
5,0.9447,0.938173,0.684848,0.032858,0.062708,0.794601
6,0.9203,0.898654,0.684848,0.032858,0.062708,0.794485
7,0.8921,0.841101,0.660819,0.032858,0.062604,0.794439
8,0.8455,0.842691,0.801418,0.032858,0.063128,0.794671
9,0.8376,0.831547,0.858268,0.031695,0.061133,0.794532


Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
New predic

TrainOutput(global_step=40, training_loss=1.4383572936058044, metrics={'train_runtime': 22351.5836, 'train_samples_per_second': 0.057, 'train_steps_per_second': 0.002, 'total_flos': 0.0, 'train_loss': 1.4383572936058044, 'epoch': 19.05})

In [None]:
trainer.evaluate(eval_dataset=ds_train)

New predictions shape: (64, 4096, 7)
New labels shape: (64, 4096)


{'eval_loss': 0.7633841037750244,
 'eval_precision': 0.7391304347826086,
 'eval_recall': 0.022363071035637408,
 'eval_f1': 0.04341265235055137,
 'eval_seqeval_acc': 0.8136210482340227,
 'eval_runtime': 746.2693,
 'eval_samples_per_second': 0.086,
 'eval_steps_per_second': 0.003,
 'epoch': 19.05}

In [None]:
trainer.evaluate(eval_dataset=ds_val)

New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)


{'eval_loss': 0.7965885996818542,
 'eval_precision': 0.859375,
 'eval_recall': 0.0319860424542018,
 'eval_f1': 0.06167647883375385,
 'eval_seqeval_acc': 0.7945547992776775,
 'eval_runtime': 366.4312,
 'eval_samples_per_second': 0.087,
 'eval_steps_per_second': 0.003,
 'epoch': 19.05}

In [None]:
trainer.save_model(f'{path}/models/{model_name}/model')

In [None]:
ds_train = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_train_128')

In [None]:
def convert_ids_to_labels(pred, true, task):
    """
    Retrieves label prediction from raw predictions then generates y_pred, y_true for seqeval. Converts
    integers into class labels.

    Input:
        pred = raw predictions from model
        true = original labels from dataset
    Output:
        y_pred
        y_true
    """
    if task == 'ner':
        labels = ['O', 'B-PERSON', 'I-PERSON', 'B-CODE', 'I-CODE', 'B-LOC', 'I-LOC', 'B-ORG', 'I-ORG',
        'B-DEM', 'I-DEM', 'B-DATETIME', 'I-DATETIME', 'B-QUANTITY', 'I-QUANTITY', 'B-MISC', 'I-MISC']
    if task == 'mask':
        labels = ['O', 'B-NO_MASK', 'I-NO_MASK', 'B-DIRECT', 'I-DIRECT', 'B-QUASI', 'I-QUASI']

    # create y_pred
    y_pred = [np.argmax(p, axis=1) for p in pred]
    y_pred = [[labels[x] for x in p] for p in y_pred]

    # create y_true
    y_true = [[0 if x == -100 else x for x in sample] for sample in true]
    y_true = [[labels[x] for x in p] for p in y_true]

    return y_pred, y_true

In [None]:
predictions, labels, metrics = trainer.predict(ds_train)

# predictions = np.load(f'{path_pred}/predictions.npy')
true_labels = ds_train['labels']

y_pred, y_true = convert_ids_to_labels(predictions, true_labels, task='mask')
print('y_pred', [len(i) for i in y_pred])
print('y_true', [len(i) for i in y_true])

New predictions shape: (64, 4096, 7)
New labels shape: (64, 4096)
y_pred [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]
y_true [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]


In [None]:
np.save(f'{path}/models/{model_name}/predictions_train.npy', predictions)
np.save(f'{path}/models/{model_name}/labels_train.npy', labels)

In [None]:
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=1)
recall = recall_score(y_true, y_pred, zero_division=1)
f1 = f1_score(y_true, y_pred, zero_division=1)

print('accuracy:', precision)
print('precision:', precision)
print('recall:', recall)
print('f1 score:', f1)

report = classification_report(y_true, y_pred, zero_division=1)
print(report)

accuracy: 0.7391304347826086
precision: 0.7391304347826086
recall: 0.022363071035637408
f1 score: 0.04341265235055137
              precision    recall  f1-score   support

      DIRECT       0.74      0.43      0.55       430
     NO_MASK       1.00      0.00      0.00      2785
       QUASI       1.00      0.00      0.00      5147

   micro avg       0.74      0.02      0.04      8362
   macro avg       0.91      0.14      0.18      8362
weighted avg       0.99      0.02      0.03      8362



In [None]:
trainer.save_model(f'{path}/models/{model_name}/model')

In [None]:
del ds_train

## Evaluation

In [None]:
# ds_test = load_dataset("json", data_files=f'{path}/data/tab/concatenated_mini/concat_test_32.json', split='train', streaming=True)
ds_test = load_from_disk(f'{path}/data/tab/concatenated_mini/concat_test_32')

In [None]:
predictions, labels, metrics = trainer.predict(ds_test)

# predictions = np.load(f'{path_pred}/predictions.npy')
true_labels = ds_test['labels']

y_pred, y_true = convert_ids_to_labels(predictions, true_labels, task='mask')
print('y_pred', [len(i) for i in y_pred])
print('y_true', [len(i) for i in y_true])

New predictions shape: (32, 4096, 7)
New labels shape: (32, 4096)
y_pred [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]
y_true [4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096]


In [None]:
np.save(f'{path}/models/{model_name}/predictions.npy', predictions)
np.save(f'{path}/models/{model_name}/labels.npy', labels)

In [None]:
true_labels = ds_test['labels']

In [None]:
predictions, labels, metrics = trainer.predict(ds_test['train'])
print(f"Metrics: {metrics}")
print(predictions[0])
print(labels[0])

## Seqeval

In [None]:
# predictions = np.load(f'{path_pred}/predictions.npy')
true_labels = ds_test['train']['labels']

y_pred, y_true = convert_ids_to_labels(predictions, true_labels, task='mask')
print('y_pred', [len(i) for i in y_pred])
print('y_true', [len(i) for i in y_true])

In [None]:
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, zero_division=1)
recall = recall_score(y_true, y_pred, zero_division=1)
f1 = f1_score(y_true, y_pred, zero_division=1)

print('accuracy:', accuracy)
print('precision:', precision)
print('recall:', recall)
print('f1 score:', f1)

report = classification_report(y_true, y_pred, zero_division=1)
print(report)

accuracy: 0.9392166137695312
precision: 0.9765625
recall: 0.03721345638582912
f1 score: 0.07169486664754804
              precision    recall  f1-score   support

      DIRECT       0.98      0.46      0.63       270
     NO_MASK       1.00      0.00      0.00       798
       QUASI       1.00      0.00      0.00      2291

   micro avg       0.98      0.04      0.07      3359
   macro avg       0.99      0.15      0.21      3359
weighted avg       1.00      0.04      0.05      3359

