# Text Mining - Assignment 2: Sequence Labelling
## Group 58: Vasiliki Gkika, Pelagia Kalpakidou

In [1]:
# libraries
!pip install datasets evaluate transformers[sentencepiece]
!apt install git-lfs
import re
import datasets
from datasets import DatasetDict
import tensorflow as tf

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git-lfs is already the newest version (3.0.2-1ubuntu0.2).
0 upgraded, 0 newly installed, 0 to remove and 19 not upgraded.


### Load data

In [2]:
# load data and convert IOB file to correct data structure
def read_datasets(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        raw_text = file.read().strip()

    raw_docs = re.split(r'\n\t?\n', raw_text)
    token_docs = []
    tag_docs = []

    for doc in raw_docs:
        tokens = []
        tags = []
        for line in doc.split('\n'):
            if len(line.split('\t')) < 2:
                continue
            token, tag = line.split('\t')
            tokens.append(token)
            tags.append(tag)
        token_docs.append(tokens)
        tag_docs.append(tags)

    return token_docs, tag_docs

tokens_train, tag_train = read_datasets('wnut17train.conll')
tokens_dev, tag_dev = read_datasets('emerging.dev.conll')
tokens_test, tag_test = read_datasets('emerging.test.annotated')

### Pre-processing

In [3]:
# map IOB tags to NER tags
mapping = {
        'O': 0,
        'B-corporation': 1,
        'I-corporation': 2,
        'B-creative-work': 3,
        'I-creative-work': 4,
        'B-group': 5,
        'I-group': 6,
        'B-location': 7,
        'I-location': 8,
        'B-person': 9,
        'I-person': 10,
        'B-product': 11,
        'I-product': 12,
    }

def IOB_to_NER (tokens, iob_tags):
    ner_tags = []
    for iob in iob_tags:
        ner_tags.append([mapping[tag] for tag in iob])
    return ner_tags

ner_train = IOB_to_NER(tokens_train, tag_train)
ner_dev = IOB_to_NER(tokens_dev, tag_dev)
ner_test = IOB_to_NER(tokens_test, tag_test)


In [4]:
train_dataset = datasets.Dataset.from_dict({"id": range(len(tokens_train)), "tokens": tokens_train, "iob_tags": tag_train, "ner_tags": ner_train})
validation_dataset = datasets.Dataset.from_dict({"id": range(len(tokens_dev)), "tokens": tokens_dev, "iob_tags": tag_dev, "ner_tags": ner_dev})
test_dataset = datasets.Dataset.from_dict({"id": range(len(tokens_test)), "tokens": tokens_test, "iob_tags": tag_test, "ner_tags": ner_test})

# from torch.utils.data import DataLoader

combined_datasets = DatasetDict({
    "train": train_dataset,
    "validation": validation_dataset,
    "test": test_dataset
})

combined_datasets
len(combined_datasets["train"]["tokens"])

3394

In [5]:
# decoding and displaying the NER tags in a human-readable format
words = combined_datasets["train"][0]["tokens"]
labels = combined_datasets["train"][0]["iob_tags"]
line1 = ""
line2 = ""

for word, labels in zip(words, labels):
    max_length = max(len(word), max(len(label) for label in labels))
    line1 += word + " " * (max_length - len(word) + 1)
    line2 += " ".join(labels) + " " * (max_length - max(len(label) for label in labels) + 1)

print(line1)
print(line2)

@paulwalk It 's the view from where I 'm living for two weeks . Empire State Building = ESB . Pretty bad storm here last evening . 
O         O  O  O   O    O    O     O O  O      O   O   O     O B - l o c a t i o n      I - l o c a t i o n     I - l o c a t i o n        O B - l o c a t i o n   O O      O   O     O    O    O       O 


#### Align labels with tokens

In [6]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# tokenize a pre-tokenized input
inputs = tokenizer(combined_datasets["train"][0]["tokens"], is_split_into_words=True)
print(inputs.tokens(), )
print(inputs.word_ids(), )

['[CLS]', '@', 'p', '##aul', '##walk', 'It', "'", 's', 'the', 'view', 'from', 'where', 'I', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'E', '##SB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
[None, 0, 0, 0, 0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, None]


In [7]:

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)
    return new_labels


labels = combined_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
print(labels)
print(align_labels_with_tokens(labels, word_ids))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0]
[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, -100]


In [8]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs



tokenized_datasets = combined_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=combined_datasets["train"].column_names,
)


tokenized_datasets

Map:   0%|          | 0/3394 [00:00<?, ? examples/s]

Map:   0%|          | 0/1009 [00:00<?, ? examples/s]

Map:   0%|          | 0/1287 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 3394
    })
    validation: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1009
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1287
    })
})

### Fine-tuning the model

In [9]:
# from transformers import DataCollatorForTokenClassification

# data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(
    tokenizer=tokenizer, return_tensors="tf"
)

In [10]:
label_names = ['O', 'B-corporation', 'I-corporation', 'B-creative-work', 'I-creative-work', 'B-group', 'I-group', 'B-location', 'I-location', 'B-person', 'I-person', 'B-product', 'I-product']
print(label_names, )

['O', 'B-corporation', 'I-corporation', 'B-creative-work', 'I-creative-work', 'B-group', 'I-group', 'B-location', 'I-location', 'B-person', 'I-person', 'B-product', 'I-product']


#### Metrics

In [11]:
!pip install seqeval



In [12]:
import evaluate
metric = evaluate.load("seqeval")

In [13]:
import numpy as np

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

In [14]:
tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=True,
    batch_size=16,
)

tf_eval_dataset = tokenized_datasets["validation"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=16,
)

tf_test_dataset = tokenized_datasets["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=16,
)

tf_train_dataset32 = tokenized_datasets["train"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=True,
    batch_size=32,
)

tf_eval_dataset32 = tokenized_datasets["validation"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=32,
)

tf_test_dataset32 = tokenized_datasets["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=32,
)

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [15]:
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [16]:
from transformers import TFAutoModelForTokenClassification

model = TFAutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

All PyTorch model weights were used when initializing TFBertForTokenClassification.

Some weights or buffers of the TF 2.0 model TFBertForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#### Train the model

In [None]:
from huggingface_hub import notebook_login

notebook_login()

# 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

In [18]:
from transformers import create_optimizer
import tensorflow as tf

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
tf.keras.mixed_precision.set_global_policy("mixed_float16")

# The number of training steps is the number of samples in the dataset, divided by the batch size then multiplied
# by the total number of epochs. Note that the tf_train_dataset here is a batched tf.data.Dataset,
# not the original Hugging Face Dataset, so its len() is already num_samples // batch_size.
num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=2e-5,
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)

In [19]:
from transformers.keras_callbacks import PushToHubCallback

callback = PushToHubCallback(output_dir="bert-finetuned-ner", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    # validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner into local empty directory.


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7c1ee6f4de10>

In [20]:
import numpy as np

all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

{'corporation': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 66},
 'creative-work': {'precision': 0.2,
  'recall': 0.07746478873239436,
  'f1': 0.11167512690355329,
  'number': 142},
 'group': {'precision': 0.2391304347826087,
  'recall': 0.06666666666666667,
  'f1': 0.10426540284360189,
  'number': 165},
 'location': {'precision': 0.3551912568306011,
  'recall': 0.43333333333333335,
  'f1': 0.3903903903903904,
  'number': 150},
 'person': {'precision': 0.6295081967213115,
  'recall': 0.44755244755244755,
  'f1': 0.5231607629427792,
  'number': 429},
 'product': {'precision': 0.047058823529411764,
  'recall': 0.031496062992125984,
  'f1': 0.03773584905660377,
  'number': 127},
 'overall_precision': 0.41253644314868804,
 'overall_recall': 0.2622798887859129,
 'overall_f1': 0.3206798866855524,
 'overall_accuracy': 0.9319597989949748}

In [21]:
# import numpy as np
# from sklearn.model_selection import ParameterGrid
# from transformers import TFAutoModelForTokenClassification, create_optimizer
# from transformers.keras_callbacks import PushToHubCallback
# import tensorflow as tf
# import evaluate

# metric = evaluate.load("seqeval")

# # Define your tokenized_datasets, data_collator, label_names, etc.

# learning_rates = [1e-5, 1e-5, 5e-6]
# batch_sizes = [16, 32]

# # Instantiate the metric (e.g., seqeval) if not already done
# # metric = evaluate.load("seqeval")

# results = []

# for batch_size in batch_sizes:
#     for learning_rate in learning_rates:
#         # Train in mixed-precision float16
#         # Comment this line out if you're using a GPU that will not benefit from this
#         tf.keras.mixed_precision.set_global_policy("mixed_float16")

#         num_epochs = 3

#         # Create the TF datasets for the given batch size
#         tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
#             columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#             collate_fn=data_collator,
#             shuffle=True,
#             batch_size=batch_size,
#         )

#         tf_eval_dataset = tokenized_datasets["validation"].to_tf_dataset(
#             columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#             collate_fn=data_collator,
#             shuffle=False,
#             batch_size=batch_size,
#         )

#         tf_test_dataset = tokenized_datasets["test"].to_tf_dataset(
#             columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#             collate_fn=data_collator,
#             shuffle=False,
#             batch_size=batch_size,
#         )

#         model = TFAutoModelForTokenClassification.from_pretrained(
#             model_checkpoint,
#             id2label=id2label,
#             label2id=label2id,
#         )

#         # optimizer with AdamW
#         num_train_steps = len(tf_train_dataset) * num_epochs
#         optimizer, schedule = create_optimizer(
#             init_lr=learning_rate,
#             num_warmup_steps=0,
#             num_train_steps=num_train_steps,
#             weight_decay_rate=0.01,
#         )

#         model.compile(optimizer=optimizer)

#         callback = PushToHubCallback(output_dir=f"bert-finetuned-ner_lr{learning_rate}_bs{batch_size}", tokenizer=tokenizer)

#         history = model.fit(
#             tf_train_dataset,
#             validation_data=tf_eval_dataset,  # Use the dev set for validation
#             callbacks=[callback],
#             epochs=num_epochs,
#         )

#         all_predictions = []
#         all_labels = []

#         for batch in tf_test_dataset:
#             logits = model.predict_on_batch(batch)["logits"]
#             labels = batch["labels"]
#             predictions = np.argmax(logits, axis=-1)
#             for prediction, label in zip(predictions, labels):
#                 for predicted_idx, label_idx in zip(prediction, label):
#                     if label_idx != -100:
#                         all_predictions.append(predicted_idx)
#                         all_labels.append(label_idx)

#         # Use metric.compute() for NER evaluation
#         report = metric.compute(predictions=[all_predictions], references=[all_labels])

#         print(f"Learning Rate: {learning_rate}, Batch Size: {batch_size}")
#         print(report)

#         results.append({
#             'learning_rate': learning_rate,
#             'batch_size': batch_size,
#             'classification_report': report,
#         })


In [22]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[0],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-1e-5-b16", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])



/content/bert-finetuned-ner-1e-5-b16 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-1e-5-b16. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3

Several commits (2) will be pushed upstream.


Epoch 3/3

Several commits (3) will be pushed upstream.




{'corporation': {'precision': 0.16666666666666666,
  'recall': 0.24242424242424243,
  'f1': 0.19753086419753085,
  'number': 66},
 'creative-work': {'precision': 0.2653061224489796,
  'recall': 0.18309859154929578,
  'f1': 0.21666666666666667,
  'number': 142},
 'group': {'precision': 0.39344262295081966,
  'recall': 0.14545454545454545,
  'f1': 0.21238938053097342,
  'number': 165},
 'location': {'precision': 0.5470085470085471,
  'recall': 0.4266666666666667,
  'f1': 0.4794007490636704,
  'number': 150},
 'person': {'precision': 0.7014925373134329,
  'recall': 0.4382284382284382,
  'f1': 0.539454806312769,
  'number': 429},
 'product': {'precision': 0.10666666666666667,
  'recall': 0.06299212598425197,
  'f1': 0.0792079207920792,
  'number': 127},
 'overall_precision': 0.45594405594405596,
 'overall_recall': 0.30213160333642264,
 'overall_f1': 0.3634336677814939,
 'overall_accuracy': 0.9345979899497487}

In [23]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[1],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-3e-5-b16", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

/content/bert-finetuned-ner-3e-5-b16 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-3e-5-b16. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3

Several commits (2) will be pushed upstream.


Epoch 3/3

Several commits (3) will be pushed upstream.




{'corporation': {'precision': 0.20833333333333334,
  'recall': 0.22727272727272727,
  'f1': 0.21739130434782608,
  'number': 66},
 'creative-work': {'precision': 0.32978723404255317,
  'recall': 0.21830985915492956,
  'f1': 0.2627118644067797,
  'number': 142},
 'group': {'precision': 0.5,
  'recall': 0.19393939393939394,
  'f1': 0.2794759825327511,
  'number': 165},
 'location': {'precision': 0.5636363636363636,
  'recall': 0.41333333333333333,
  'f1': 0.47692307692307695,
  'number': 150},
 'person': {'precision': 0.7529411764705882,
  'recall': 0.44755244755244755,
  'f1': 0.5614035087719298,
  'number': 429},
 'product': {'precision': 0.2,
  'recall': 0.14173228346456693,
  'f1': 0.1658986175115207,
  'number': 127},
 'overall_precision': 0.5109489051094891,
 'overall_recall': 0.3243744207599629,
 'overall_f1': 0.39682539682539686,
 'overall_accuracy': 0.9351005025125628}

In [24]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[2],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-5e-5-b16", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

/content/bert-finetuned-ner-5e-5-b16 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-5e-5-b16. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3

Several commits (2) will be pushed upstream.


Epoch 3/3

Several commits (3) will be pushed upstream.




{'corporation': {'precision': 0.20238095238095238,
  'recall': 0.25757575757575757,
  'f1': 0.22666666666666666,
  'number': 66},
 'creative-work': {'precision': 0.3563218390804598,
  'recall': 0.21830985915492956,
  'f1': 0.2707423580786026,
  'number': 142},
 'group': {'precision': 0.5166666666666667,
  'recall': 0.18787878787878787,
  'f1': 0.27555555555555555,
  'number': 165},
 'location': {'precision': 0.5344827586206896,
  'recall': 0.41333333333333333,
  'f1': 0.46616541353383456,
  'number': 150},
 'person': {'precision': 0.7642276422764228,
  'recall': 0.4382284382284382,
  'f1': 0.557037037037037,
  'number': 429},
 'product': {'precision': 0.17391304347826086,
  'recall': 0.12598425196850394,
  'f1': 0.1461187214611872,
  'number': 127},
 'overall_precision': 0.5036496350364964,
 'overall_recall': 0.31974050046339203,
 'overall_f1': 0.391156462585034,
 'overall_accuracy': 0.9343718592964824}

In [25]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[0],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-1e-5-b32", tokenizer=tokenizer)

model.fit(
    tf_train_dataset32,
    validation_data=tf_eval_dataset32,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset32:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

/content/bert-finetuned-ner-1e-5-b32 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-1e-5-b32. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3

Several commits (2) will be pushed upstream.


Epoch 3/3

Several commits (3) will be pushed upstream.




{'corporation': {'precision': 0.18478260869565216,
  'recall': 0.25757575757575757,
  'f1': 0.21518987341772153,
  'number': 66},
 'creative-work': {'precision': 0.4230769230769231,
  'recall': 0.2323943661971831,
  'f1': 0.3,
  'number': 142},
 'group': {'precision': 0.4696969696969697,
  'recall': 0.18787878787878787,
  'f1': 0.2683982683982684,
  'number': 165},
 'location': {'precision': 0.5803571428571429,
  'recall': 0.43333333333333335,
  'f1': 0.4961832061068702,
  'number': 150},
 'person': {'precision': 0.7649572649572649,
  'recall': 0.4172494172494173,
  'f1': 0.5399698340874812,
  'number': 429},
 'product': {'precision': 0.18823529411764706,
  'recall': 0.12598425196850394,
  'f1': 0.1509433962264151,
  'number': 127},
 'overall_precision': 0.5112443778110944,
 'overall_recall': 0.3160333642261353,
 'overall_f1': 0.3906071019473082,
 'overall_accuracy': 0.9342462311557789}

In [26]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[1],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-3e-5-32", tokenizer=tokenizer)

model.fit(
    tf_train_dataset32,
    validation_data=tf_eval_dataset32,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset32:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

/content/bert-finetuned-ner-3e-5-32 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-3e-5-32. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3

Several commits (2) will be pushed upstream.


Epoch 3/3

Several commits (3) will be pushed upstream.




{'corporation': {'precision': 0.25,
  'recall': 0.22727272727272727,
  'f1': 0.23809523809523808,
  'number': 66},
 'creative-work': {'precision': 0.3684210526315789,
  'recall': 0.19718309859154928,
  'f1': 0.2568807339449541,
  'number': 142},
 'group': {'precision': 0.5625,
  'recall': 0.16363636363636364,
  'f1': 0.2535211267605634,
  'number': 165},
 'location': {'precision': 0.5666666666666667,
  'recall': 0.4533333333333333,
  'f1': 0.5037037037037037,
  'number': 150},
 'person': {'precision': 0.73828125,
  'recall': 0.4405594405594406,
  'f1': 0.5518248175182482,
  'number': 429},
 'product': {'precision': 0.171875,
  'recall': 0.1732283464566929,
  'f1': 0.1725490196078431,
  'number': 127},
 'overall_precision': 0.5072674418604651,
 'overall_recall': 0.32344763670064874,
 'overall_f1': 0.39501980758347477,
 'overall_accuracy': 0.9351005025125628}

In [27]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[2],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-5e-5-32", tokenizer=tokenizer)

model.fit(
    tf_train_dataset32,
    validation_data=tf_eval_dataset32,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset32:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

/content/bert-finetuned-ner-5e-5-32 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-5e-5-32. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3

Several commits (2) will be pushed upstream.


Epoch 3/3

Several commits (3) will be pushed upstream.




{'corporation': {'precision': 0.2112676056338028,
  'recall': 0.22727272727272727,
  'f1': 0.21897810218978103,
  'number': 66},
 'creative-work': {'precision': 0.40963855421686746,
  'recall': 0.23943661971830985,
  'f1': 0.3022222222222222,
  'number': 142},
 'group': {'precision': 0.4714285714285714,
  'recall': 0.2,
  'f1': 0.28085106382978725,
  'number': 165},
 'location': {'precision': 0.5739130434782609,
  'recall': 0.44,
  'f1': 0.49811320754716987,
  'number': 150},
 'person': {'precision': 0.758893280632411,
  'recall': 0.44755244755244755,
  'f1': 0.563049853372434,
  'number': 429},
 'product': {'precision': 0.19101123595505617,
  'recall': 0.13385826771653545,
  'f1': 0.1574074074074074,
  'number': 127},
 'overall_precision': 0.5242290748898678,
 'overall_recall': 0.33086190917516217,
 'overall_f1': 0.40568181818181814,
 'overall_accuracy': 0.9350502512562814}

In [28]:
# from transformers import TFAutoModelForTokenClassification
# from sklearn.metrics import classification_report, f1_score

# model_checkpoint2 = "PelagiaKalpakidou/bert-finetuned-ner-5e-5-32"
# tokenizer = AutoTokenizer.from_pretrained(model_checkpoint2)

# tf_train_dataset32 = tokenized_datasets["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#     collate_fn=data_collator,
#     shuffle=True,
#     batch_size=32,
# )

# tf_eval_dataset32 = tokenized_datasets["validation"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#     collate_fn=data_collator,
#     shuffle=False,
#     batch_size=32,
# )

# tf_test_dataset32 = tokenized_datasets["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#     collate_fn=data_collator,
#     shuffle=False,
#     batch_size=32,
# )

# model = TFAutoModelForTokenClassification.from_pretrained(
#     model_checkpoint2,
#     id2label=id2label,
#     label2id=label2id,
# )

# all_predictions = []
# all_labels = []
# for batch in tf_test_dataset32:
#     logits = model.predict_on_batch(batch)["logits"]
#     labels = batch["labels"]
#     predictions = np.argmax(logits, axis=-1)
#     for prediction, label in zip(predictions, labels):
#         for predicted_idx, label_idx in zip(prediction, label):
#             if label_idx == -100:
#                 continue
#             all_predictions.append(label_names[predicted_idx])
#             all_labels.append(label_names[label_idx])

# # Compute classification report (precision, recall, F1-score)
# report = classification_report(all_labels, all_predictions)
# print(report)

# micro_avg_f1 = f1_score(all_labels, all_predictions, average='micro')
# print("Micro-average F1 score:", micro_avg_f1)

# # Calculate macro-average F1 score
# macro_avg_f1 = f1_score(all_labels, all_predictions, average='macro')
# print("Macro-average F1 score:", macro_avg_f1)

OSError: ignored