# Text Mining - Assignment 2: Sequence Labelling
## Group 58: Vasiliki Gkika, Pelagia Kalpakidou

In [1]:
# libraries
!pip install datasets evaluate transformers[sentencepiece]
!apt install git-lfs
import re
import datasets
from datasets import DatasetDict
import tensorflow as tf

Collecting datasets
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/493.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/493.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers[sentencepiece]
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m95.3 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━

### Load data

In [2]:
# load data and convert IOB file to correct data structure
def read_datasets(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        raw_text = file.read().strip()

    raw_docs = re.split(r'\n\t?\n', raw_text)
    token_docs = []
    tag_docs = []

    for doc in raw_docs:
        tokens = []
        tags = []
        for line in doc.split('\n'):
            if len(line.split('\t')) < 2:
                continue
            token, tag = line.split('\t')
            tokens.append(token)
            tags.append(tag)
        token_docs.append(tokens)
        tag_docs.append(tags)

    return token_docs, tag_docs

tokens_train, tag_train = read_datasets('wnut17train.conll')
tokens_dev, tag_dev = read_datasets('emerging.dev.conll')
tokens_test, tag_test = read_datasets('emerging.test.annotated')

### Pre-processing

In [3]:
# map IOB tags to NER tags
mapping = {
        'O': 0,
        'B-corporation': 1,
        'I-corporation': 2,
        'B-creative-work': 3,
        'I-creative-work': 4,
        'B-group': 5,
        'I-group': 6,
        'B-location': 7,
        'I-location': 8,
        'B-person': 9,
        'I-person': 10,
        'B-product': 11,
        'I-product': 12,
    }

def IOB_to_NER (tokens, iob_tags):
    ner_tags = []
    for iob in iob_tags:
        ner_tags.append([mapping[tag] for tag in iob])
    return ner_tags

ner_train = IOB_to_NER(tokens_train, tag_train)
ner_dev = IOB_to_NER(tokens_dev, tag_dev)
ner_test = IOB_to_NER(tokens_test, tag_test)


In [4]:
train_dataset = datasets.Dataset.from_dict({"id": range(len(tokens_train)), "tokens": tokens_train, "iob_tags": tag_train, "ner_tags": ner_train})
validation_dataset = datasets.Dataset.from_dict({"id": range(len(tokens_dev)), "tokens": tokens_dev, "iob_tags": tag_dev, "ner_tags": ner_dev})
test_dataset = datasets.Dataset.from_dict({"id": range(len(tokens_test)), "tokens": tokens_test, "iob_tags": tag_test, "ner_tags": ner_test})

# from torch.utils.data import DataLoader

combined_datasets = DatasetDict({
    "train": train_dataset,
    "validation": validation_dataset,
    "test": test_dataset
})

combined_datasets
len(combined_datasets["train"]["tokens"])

3394

In [5]:
# decoding and displaying the NER tags in a human-readable format
words = combined_datasets["train"][0]["tokens"]
labels = combined_datasets["train"][0]["iob_tags"]
line1 = ""
line2 = ""

for word, labels in zip(words, labels):
    max_length = max(len(word), max(len(label) for label in labels))
    line1 += word + " " * (max_length - len(word) + 1)
    line2 += " ".join(labels) + " " * (max_length - max(len(label) for label in labels) + 1)

print(line1)
print(line2)

@paulwalk It 's the view from where I 'm living for two weeks . Empire State Building = ESB . Pretty bad storm here last evening . 
O         O  O  O   O    O    O     O O  O      O   O   O     O B - l o c a t i o n      I - l o c a t i o n     I - l o c a t i o n        O B - l o c a t i o n   O O      O   O     O    O    O       O 


#### Align labels with tokens

In [6]:
from transformers import AutoTokenizer

model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# tokenize a pre-tokenized input
inputs = tokenizer(combined_datasets["train"][0]["tokens"], is_split_into_words=True)
print(inputs.tokens(), )
print(inputs.word_ids(), )

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

['[CLS]', '@', 'p', '##aul', '##walk', 'It', "'", 's', 'the', 'view', 'from', 'where', 'I', "'", 'm', 'living', 'for', 'two', 'weeks', '.', 'Empire', 'State', 'Building', '=', 'E', '##SB', '.', 'Pretty', 'bad', 'storm', 'here', 'last', 'evening', '.', '[SEP]']
[None, 0, 0, 0, 0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, None]


In [7]:

def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)
    return new_labels


labels = combined_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
print(labels)
print(align_labels_with_tokens(labels, word_ids))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0]
[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 8, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0, -100]


In [8]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs



tokenized_datasets = combined_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=combined_datasets["train"].column_names,
)


tokenized_datasets

Map:   0%|          | 0/3394 [00:00<?, ? examples/s]

Map:   0%|          | 0/1009 [00:00<?, ? examples/s]

Map:   0%|          | 0/1287 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 3394
    })
    validation: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1009
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1287
    })
})

### Fine-tuning the model

In [9]:
# from transformers import DataCollatorForTokenClassification

# data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(
    tokenizer=tokenizer, return_tensors="tf"
)

In [14]:
label_names = ['O', 'B-corporation', 'I-corporation', 'B-creative-work', 'I-creative-work', 'B-group', 'I-group', 'B-location', 'I-location', 'B-person', 'I-person', 'B-product', 'I-product']
print(label_names, )

['O', 'B-corporation', 'I-corporation', 'B-creative-work', 'I-creative-work', 'B-group', 'I-group', 'B-location', 'I-location', 'B-person', 'I-person', 'B-product', 'I-product']


#### Metrics

In [10]:
!pip install seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=ee9846a3b62b832f703e7c00619fdc7029b14dae10a9ecc7b2f5f62364e64bef
  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


In [11]:
import evaluate
metric = evaluate.load("seqeval")

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [19]:
import numpy as np

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

In [35]:
tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=True,
    batch_size=16,
)

tf_eval_dataset = tokenized_datasets["validation"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=16,
)

tf_test_dataset = tokenized_datasets["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=16,
)

tf_train_dataset32 = tokenized_datasets["train"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=True,
    batch_size=32,
)

tf_eval_dataset32 = tokenized_datasets["validation"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=32,
)

tf_test_dataset32 = tokenized_datasets["test"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
    collate_fn=data_collator,
    shuffle=False,
    batch_size=32,
)

In [15]:
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [22]:
from transformers import TFAutoModelForTokenClassification

model = TFAutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

Downloading model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForTokenClassification.

Some weights or buffers of the TF 2.0 model TFBertForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#### Train the model

In [16]:
from huggingface_hub import notebook_login

notebook_login()

# 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

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [39]:
from transformers import create_optimizer
import tensorflow as tf

# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
tf.keras.mixed_precision.set_global_policy("mixed_float16")

# The number of training steps is the number of samples in the dataset, divided by the batch size then multiplied
# by the total number of epochs. Note that the tf_train_dataset here is a batched tf.data.Dataset,
# not the original Hugging Face Dataset, so its len() is already num_samples // batch_size.
num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=2e-5,
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)

In [28]:
from transformers.keras_callbacks import PushToHubCallback

callback = PushToHubCallback(output_dir="bert-finetuned-ner", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    # validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner into local empty directory.


Download file tf_model.h5:   0%|          | 1.45k/411M [00:00<?, ?B/s]

Download file runs/Nov07_00-16-52_DESKTOP-AQ9QUUF/events.out.tfevents.1699312615.DESKTOP-AQ9QUUF.23288.4: 100%…

Download file runs/Nov07_10-46-52_DESKTOP-AQ9QUUF/events.out.tfevents.1699350412.DESKTOP-AQ9QUUF.15484.2: 100%…

Download file runs/Nov07_00-14-24_DESKTOP-AQ9QUUF/events.out.tfevents.1699312466.DESKTOP-AQ9QUUF.23288.3: 100%…

Clean file runs/Nov07_00-16-52_DESKTOP-AQ9QUUF/events.out.tfevents.1699312615.DESKTOP-AQ9QUUF.23288.4:  21%|##…

Download file runs/Nov07_00-17-19_DESKTOP-AQ9QUUF/events.out.tfevents.1699312641.DESKTOP-AQ9QUUF.23288.5: 100%…

Clean file runs/Nov07_00-14-24_DESKTOP-AQ9QUUF/events.out.tfevents.1699312466.DESKTOP-AQ9QUUF.23288.3:  21%|##…

Clean file runs/Nov07_10-46-52_DESKTOP-AQ9QUUF/events.out.tfevents.1699350412.DESKTOP-AQ9QUUF.15484.2:  22%|##…

Clean file runs/Nov07_00-17-19_DESKTOP-AQ9QUUF/events.out.tfevents.1699312641.DESKTOP-AQ9QUUF.23288.5:  21%|##…

Clean file tf_model.h5:   0%|          | 1.00k/411M [00:00<?, ?B/s]

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7cbb398e9f30>

In [29]:
import numpy as np

all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

{'corporation': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 66},
 'creative-work': {'precision': 0.2804878048780488,
  'recall': 0.1619718309859155,
  'f1': 0.20535714285714288,
  'number': 142},
 'group': {'precision': 0.34285714285714286,
  'recall': 0.07272727272727272,
  'f1': 0.12,
  'number': 165},
 'location': {'precision': 0.3916083916083916,
  'recall': 0.37333333333333335,
  'f1': 0.3822525597269624,
  'number': 150},
 'person': {'precision': 0.7261410788381742,
  'recall': 0.40792540792540793,
  'f1': 0.5223880597014926,
  'number': 429},
 'product': {'precision': 0.13157894736842105,
  'recall': 0.07874015748031496,
  'f1': 0.09852216748768472,
  'number': 127},
 'overall_precision': 0.4734133790737564,
 'overall_recall': 0.2557924003707136,
 'overall_f1': 0.33212996389891697,
 'overall_accuracy': 0.9292713567839196}

In [None]:
# import numpy as np
# from sklearn.model_selection import ParameterGrid
# from transformers import TFAutoModelForTokenClassification, create_optimizer
# from transformers.keras_callbacks import PushToHubCallback
# import tensorflow as tf
# import evaluate

# metric = evaluate.load("seqeval")

# # Define your tokenized_datasets, data_collator, label_names, etc.

# learning_rates = [1e-5, 3e-5, 5e-6]
# batch_sizes = [16, 32]

# # Instantiate the metric (e.g., seqeval) if not already done
# # metric = evaluate.load("seqeval")

# results = []

# for batch_size in batch_sizes:
#     for learning_rate in learning_rates:
#         # Train in mixed-precision float16
#         # Comment this line out if you're using a GPU that will not benefit from this
#         tf.keras.mixed_precision.set_global_policy("mixed_float16")

#         num_epochs = 3

#         # Create the TF datasets for the given batch size
#         tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
#             columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#             collate_fn=data_collator,
#             shuffle=True,
#             batch_size=batch_size,
#         )

#         tf_eval_dataset = tokenized_datasets["validation"].to_tf_dataset(
#             columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#             collate_fn=data_collator,
#             shuffle=False,
#             batch_size=batch_size,
#         )

#         tf_test_dataset = tokenized_datasets["test"].to_tf_dataset(
#             columns=["attention_mask", "input_ids", "labels", "token_type_ids"],
#             collate_fn=data_collator,
#             shuffle=False,
#             batch_size=batch_size,
#         )

#         model = TFAutoModelForTokenClassification.from_pretrained(
#             model_checkpoint,
#             id2label=id2label,
#             label2id=label2id,
#         )

#         # optimizer with AdamW
#         num_train_steps = len(tf_train_dataset) * num_epochs
#         optimizer, schedule = create_optimizer(
#             init_lr=learning_rate,
#             num_warmup_steps=0,
#             num_train_steps=num_train_steps,
#             weight_decay_rate=0.01,
#         )

#         model.compile(optimizer=optimizer)

#         callback = PushToHubCallback(output_dir=f"bert-finetuned-ner_lr{learning_rate}_bs{batch_size}", tokenizer=tokenizer)

#         history = model.fit(
#             tf_train_dataset,
#             validation_data=tf_eval_dataset,  # Use the dev set for validation
#             callbacks=[callback],
#             epochs=num_epochs,
#         )

#         all_predictions = []
#         all_labels = []

#         for batch in tf_test_dataset:
#             logits = model.predict_on_batch(batch)["logits"]
#             labels = batch["labels"]
#             predictions = np.argmax(logits, axis=-1)
#             for prediction, label in zip(predictions, labels):
#                 for predicted_idx, label_idx in zip(prediction, label):
#                     if label_idx != -100:
#                         all_predictions.append(predicted_idx)
#                         all_labels.append(label_idx)

#         # Use metric.compute() for NER evaluation
#         report = metric.compute(predictions=[all_predictions], references=[all_labels])

#         print(f"Learning Rate: {learning_rate}, Batch Size: {batch_size}")
#         print(report)

#         results.append({
#             'learning_rate': learning_rate,
#             'batch_size': batch_size,
#             'classification_report': report,
#         })


In [31]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[0],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-1e-5-b16", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-1e-5-b16 into local empty directory.


Epoch 1/3
Epoch 2/3
Epoch 3/3


{'corporation': {'precision': 0.23376623376623376,
  'recall': 0.2727272727272727,
  'f1': 0.2517482517482518,
  'number': 66},
 'creative-work': {'precision': 0.4125,
  'recall': 0.2323943661971831,
  'f1': 0.2972972972972973,
  'number': 142},
 'group': {'precision': 0.4153846153846154,
  'recall': 0.16363636363636364,
  'f1': 0.23478260869565218,
  'number': 165},
 'location': {'precision': 0.5545454545454546,
  'recall': 0.4066666666666667,
  'f1': 0.4692307692307693,
  'number': 150},
 'person': {'precision': 0.7392996108949417,
  'recall': 0.4428904428904429,
  'f1': 0.5539358600583092,
  'number': 429},
 'product': {'precision': 0.14285714285714285,
  'recall': 0.10236220472440945,
  'f1': 0.11926605504587157,
  'number': 127},
 'overall_precision': 0.5029411764705882,
 'overall_recall': 0.3169601482854495,
 'overall_f1': 0.3888573052870949,
 'overall_accuracy': 0.9346231155778895}

In [32]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[1],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-3e-5-b16", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-3e-5-b16 into local empty directory.


Epoch 1/3
Epoch 2/3
Epoch 3/3


{'corporation': {'precision': 0.234375,
  'recall': 0.22727272727272727,
  'f1': 0.23076923076923075,
  'number': 66},
 'creative-work': {'precision': 0.37362637362637363,
  'recall': 0.23943661971830985,
  'f1': 0.2918454935622317,
  'number': 142},
 'group': {'precision': 0.48,
  'recall': 0.21818181818181817,
  'f1': 0.3,
  'number': 165},
 'location': {'precision': 0.5789473684210527,
  'recall': 0.44,
  'f1': 0.5,
  'number': 150},
 'person': {'precision': 0.6962962962962963,
  'recall': 0.4382284382284382,
  'f1': 0.5379113018597997,
  'number': 429},
 'product': {'precision': 0.1827956989247312,
  'recall': 0.13385826771653545,
  'f1': 0.15454545454545454,
  'number': 127},
 'overall_precision': 0.5035360678925035,
 'overall_recall': 0.329935125115848,
 'overall_f1': 0.39865621500559906,
 'overall_accuracy': 0.9358542713567839}

In [33]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[2],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-5e-5-b16", tokenizer=tokenizer)

model.fit(
    tf_train_dataset,
    validation_data=tf_eval_dataset,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-5e-5-b16 into local empty directory.


Epoch 1/3
Epoch 2/3
Epoch 3/3


{'corporation': {'precision': 0.20689655172413793,
  'recall': 0.2727272727272727,
  'f1': 0.23529411764705882,
  'number': 66},
 'creative-work': {'precision': 0.41975308641975306,
  'recall': 0.23943661971830985,
  'f1': 0.30493273542600896,
  'number': 142},
 'group': {'precision': 0.4520547945205479,
  'recall': 0.2,
  'f1': 0.2773109243697479,
  'number': 165},
 'location': {'precision': 0.5775862068965517,
  'recall': 0.44666666666666666,
  'f1': 0.5037593984962406,
  'number': 150},
 'person': {'precision': 0.7195571955719557,
  'recall': 0.45454545454545453,
  'f1': 0.557142857142857,
  'number': 429},
 'product': {'precision': 0.19047619047619047,
  'recall': 0.12598425196850394,
  'f1': 0.15165876777251183,
  'number': 127},
 'overall_precision': 0.5098314606741573,
 'overall_recall': 0.33642261353104724,
 'overall_f1': 0.4053601340033501,
 'overall_accuracy': 0.9359296482412061}

In [36]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[0],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-1e-5-b32", tokenizer=tokenizer)

model.fit(
    tf_train_dataset32,
    validation_data=tf_eval_dataset32,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

/content/bert-finetuned-ner-1e-5-b32 is already a clone of https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-1e-5-b32. Make sure you pull the latest changes with `repo.git_pull()`.


Epoch 1/3
Epoch 2/3
Epoch 3/3


{'corporation': {'precision': 0.2236842105263158,
  'recall': 0.25757575757575757,
  'f1': 0.23943661971830985,
  'number': 66},
 'creative-work': {'precision': 0.4225352112676056,
  'recall': 0.2112676056338028,
  'f1': 0.2816901408450704,
  'number': 142},
 'group': {'precision': 0.4533333333333333,
  'recall': 0.20606060606060606,
  'f1': 0.2833333333333333,
  'number': 165},
 'location': {'precision': 0.5726495726495726,
  'recall': 0.44666666666666666,
  'f1': 0.50187265917603,
  'number': 150},
 'person': {'precision': 0.7401574803149606,
  'recall': 0.4382284382284382,
  'f1': 0.5505124450951684,
  'number': 429},
 'product': {'precision': 0.17391304347826086,
  'recall': 0.12598425196850394,
  'f1': 0.1461187214611872,
  'number': 127},
 'overall_precision': 0.5138686131386861,
 'overall_recall': 0.3262279888785913,
 'overall_f1': 0.39909297052154197,
 'overall_accuracy': 0.935502512562814}

In [37]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[1],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-3e-5-32", tokenizer=tokenizer)

model.fit(
    tf_train_dataset32,
    validation_data=tf_eval_dataset32,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-3e-5-32 into local empty directory.


Epoch 1/3
Epoch 2/3
Epoch 3/3


{'corporation': {'precision': 0.2236842105263158,
  'recall': 0.25757575757575757,
  'f1': 0.23943661971830985,
  'number': 66},
 'creative-work': {'precision': 0.32038834951456313,
  'recall': 0.2323943661971831,
  'f1': 0.2693877551020408,
  'number': 142},
 'group': {'precision': 0.3627450980392157,
  'recall': 0.22424242424242424,
  'f1': 0.27715355805243447,
  'number': 165},
 'location': {'precision': 0.648936170212766,
  'recall': 0.4066666666666667,
  'f1': 0.5,
  'number': 150},
 'person': {'precision': 0.7401574803149606,
  'recall': 0.4382284382284382,
  'f1': 0.5505124450951684,
  'number': 429},
 'product': {'precision': 0.1554054054054054,
  'recall': 0.18110236220472442,
  'f1': 0.1672727272727273,
  'number': 127},
 'overall_precision': 0.462033462033462,
 'overall_recall': 0.33271547729379053,
 'overall_f1': 0.38685344827586204,
 'overall_accuracy': 0.9343718592964824}

In [38]:
from transformers import create_optimizer
import tensorflow as tf
from transformers.keras_callbacks import PushToHubCallback
import numpy as np
# Train in mixed-precision float16
# Comment this line out if you're using a GPU that will not benefit from this
# tf.keras.mixed_precision.set_global_policy("mixed_float16")

learning_rates = [1e-5, 3e-5, 5e-6]

num_epochs = 3
num_train_steps = len(tf_train_dataset) * num_epochs

optimizer, schedule = create_optimizer(
    init_lr=learning_rates[2],
    num_warmup_steps=0,
    num_train_steps=num_train_steps,
    weight_decay_rate=0.01,
)
model.compile(optimizer=optimizer)


callback = PushToHubCallback(output_dir="bert-finetuned-ner-5e-5-32", tokenizer=tokenizer)

model.fit(
    tf_train_dataset32,
    validation_data=tf_eval_dataset32,
    callbacks=[callback],
    epochs=num_epochs,
)


all_predictions = []
all_labels = []
for batch in tf_test_dataset:
    logits = model.predict_on_batch(batch)["logits"]
    labels = batch["labels"]
    predictions = np.argmax(logits, axis=-1)
    for prediction, label in zip(predictions, labels):
        for predicted_idx, label_idx in zip(prediction, label):
            if label_idx == -100:
                continue
            all_predictions.append(label_names[predicted_idx])
            all_labels.append(label_names[label_idx])
metric.compute(predictions=[all_predictions], references=[all_labels])

Cloning https://huggingface.co/PelagiaKalpakidou/bert-finetuned-ner-5e-5-32 into local empty directory.


Epoch 1/3
Epoch 2/3
Epoch 3/3


{'corporation': {'precision': 0.25,
  'recall': 0.2878787878787879,
  'f1': 0.26760563380281693,
  'number': 66},
 'creative-work': {'precision': 0.34375,
  'recall': 0.2323943661971831,
  'f1': 0.27731092436974786,
  'number': 142},
 'group': {'precision': 0.5068493150684932,
  'recall': 0.22424242424242424,
  'f1': 0.31092436974789917,
  'number': 165},
 'location': {'precision': 0.5982142857142857,
  'recall': 0.44666666666666666,
  'f1': 0.5114503816793893,
  'number': 150},
 'person': {'precision': 0.7335907335907336,
  'recall': 0.4428904428904429,
  'f1': 0.5523255813953488,
  'number': 429},
 'product': {'precision': 0.1827956989247312,
  'recall': 0.13385826771653545,
  'f1': 0.15454545454545454,
  'number': 127},
 'overall_precision': 0.5119887165021156,
 'overall_recall': 0.33642261353104724,
 'overall_f1': 0.40604026845637586,
 'overall_accuracy': 0.9358040201005026}