In [1]:
!pip install datasets

Defaulting to user installation because normal site-packages is not writeable


In [3]:
from datasets import load_dataset
from sklearn.preprocessing import MultiLabelBinarizer
import tensorflow as tf
from transformers import AlbertTokenizer, TFAlbertModel
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score

# Load dataset
dataset = load_dataset("surrey-nlp/PLOD-CW")

# Extract features and labels
train_data = dataset["train"]
val_data = dataset["validation"]
test_data = dataset["test"]

X_train = train_data["tokens"]
y_train = train_data["ner_tags"]
X_val = val_data["tokens"]
y_val = val_data["ner_tags"]
X_test = test_data["tokens"]
y_test = test_data["ner_tags"]

X_train = [" ".join(tokens) for tokens in train_data["tokens"]]
X_val = [" ".join(tokens) for tokens in val_data["tokens"]]
X_test = [" ".join(tokens) for tokens in test_data["tokens"]]

# Convert multi-labels to binary format
mlb = MultiLabelBinarizer()
y_train_binarized = mlb.fit_transform(y_train)
y_val_binarized = mlb.transform(y_val)
y_test_binarized = mlb.transform(y_test)

# Initialize the ALBERT tokenizer and model
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
albert_model = TFAlbertModel.from_pretrained("albert-base-v2")

def tokenize_and_encode(sentences):
    # Ensure that sentences is a list of strings
    if not all(isinstance(sentence, str) for sentence in sentences):
        raise ValueError("All items in 'sentences' must be of type str.")
    # Tokenization and encoding
    tokenized = tokenizer(sentences, padding='max_length', truncation=True, max_length=128, return_tensors="tf")
    return tokenized

# Tokenize text data
train_tokenized = tokenize_and_encode(X_train)
val_tokenized = tokenize_and_encode(X_val)
test_tokenized = tokenize_and_encode(X_test)

# Define the neural network model (ALBERT + RNN)
input_ids = tf.keras.layers.Input(shape=(128,), dtype=tf.int32, name="input_ids")
attention_mask = tf.keras.layers.Input(shape=(128,), dtype=tf.int32, name="attention_mask")

# Get the embeddings from ALBERT
albert_output = albert_model(input_ids=input_ids, attention_mask=attention_mask)[0]  

# Add an RNN layer on top of the ALBERT model
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=False))(albert_output)
outputs = tf.keras.layers.Dense(y_train_binarized.shape[1], activation='sigmoid')(x)

model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    {'input_ids': train_tokenized['input_ids'], 'attention_mask': train_tokenized['attention_mask']},
    y_train_binarized,
    validation_data=(
        {'input_ids': val_tokenized['input_ids'], 'attention_mask': val_tokenized['attention_mask']},
        y_val_binarized),
    epochs=10,
    batch_size=32,
    verbose=1
)

# Evaluate the model on the test set
test_scores = model.evaluate({'input_ids': test_tokenized['input_ids'], 'attention_mask': test_tokenized['attention_mask']}, y_test_binarized, verbose=0)
print("Test Loss:", test_scores[0])
print("Test Accuracy:", test_scores[1])

# Predict labels for the test set
y_pred_proba = model.predict({'input_ids': test_tokenized['input_ids'], 'attention_mask': test_tokenized['attention_mask']})
y_pred_binary = (y_pred_proba > 0.5).astype(int)

# Calculate and print metrics
conf_matrix = confusion_matrix(y_test_binarized.argmax(axis=1), y_pred_binary.argmax(axis=1))
print("Confusion Matrix:")
print(conf_matrix)

print("Classification Report:")
print(classification_report(y_test_binarized, y_pred_binary, target_names=mlb.classes_))

accuracy = accuracy_score(y_test_binarized, y_pred_binary)
print("Accuracy:", accuracy)


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFAlbertModel: ['predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.decoder.bias', 'predictions.bias', 'predictions.dense.bias', 'predictions.dense.weight']
- This IS expected if you are initializing TFAlbertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFAlbertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFAlbertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFAlbertModel for predictions without further training.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.42485710978507996
Test Accuracy: 0.1437908560037613
Confusion Matrix:
[[131   0]
 [ 22   0]]
Classification Report:
              precision    recall  f1-score   support

        B-AC       0.86      1.00      0.92       131
        B-LF       0.74      1.00      0.85       113
         B-O       1.00      1.00      1.00       153
        I-LF       0.65      1.00      0.79       100

   micro avg       0.81      1.00      0.90       497
   macro avg       0.81      1.00      0.89       497
weighted avg       0.83      1.00      0.90       497
 samples avg       0.81      1.00      0.86       497

Accuracy: 0.6535947712418301


In [5]:
from sklearn.metrics import f1_score

# Calculate F1 Score
f1 = f1_score(y_test_binarized, y_pred_binary, average='weighted')  # You can change to 'micro', 'macro', or 'weighted'
print("F1 Score:", f1)


F1 Score: 0.9032414786162138
