In [1]:
# Installing the required libraries
!pip install datasets
!pip install transformers

Collecting datasets
  Downloading datasets-2.2.1-py3-none-any.whl (342 kB)
[?25l[K     |█                               | 10 kB 17.6 MB/s eta 0:00:01[K     |██                              | 20 kB 10.2 MB/s eta 0:00:01[K     |██▉                             | 30 kB 8.1 MB/s eta 0:00:01[K     |███▉                            | 40 kB 7.6 MB/s eta 0:00:01[K     |████▉                           | 51 kB 4.3 MB/s eta 0:00:01[K     |█████▊                          | 61 kB 5.0 MB/s eta 0:00:01[K     |██████▊                         | 71 kB 5.3 MB/s eta 0:00:01[K     |███████▋                        | 81 kB 4.2 MB/s eta 0:00:01[K     |████████▋                       | 92 kB 4.6 MB/s eta 0:00:01[K     |█████████▋                      | 102 kB 5.0 MB/s eta 0:00:01[K     |██████████▌                     | 112 kB 5.0 MB/s eta 0:00:01[K     |███████████▌                    | 122 kB 5.0 MB/s eta 0:00:01[K     |████████████▌                   | 133 kB 5.0 MB/s eta 0:00:01[

In [2]:
# Loading in the required libraries
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
import numpy as np

In [3]:
# Import the IMDb and AG News datasets from Hugging Face
imdb_dataset = load_dataset("imdb")
ag_dataset = load_dataset("ag_news")

# Remove when ready for full training
imdb_train_samp = imdb_dataset["train"].shuffle(seed=123).select(range(2000))
imdb_validation_samp = imdb_dataset["test"].shuffle(seed=123).select(range(2000))

ag_train_samp = ag_dataset["train"].shuffle(seed=123).select(range(4000))
ag_validation_samp = ag_dataset["test"].shuffle(seed=123).select(range(4000))

# Exploring the datasets
# imdb_dataset["train"]["text"]
# imdb_dataset["train"]["label"]
# imdb_dataset["train"][0:5]
# ag_dataset["train"]["text"]
# ag_dataset["train"]["label"]
# ag_dataset["train"][0:5]

# Downloading the BERT, DistilBERT and RoBERTa checkpoints and tokenisers and tokenising the dataset
bert_checkpoint = "bert-base-uncased"
bert_tokenizer = AutoTokenizer.from_pretrained(bert_checkpoint)

dibert_checkpoint = "distilbert-base-uncased"
dibert_tokenizer = AutoTokenizer.from_pretrained(dibert_checkpoint)

roberta_checkpoint = "roberta-base"
roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_checkpoint)

def bert_tokenize_function(example):
    return bert_tokenizer(example["text"], truncation=True)

def dibert_tokenize_function(example):
    return dibert_tokenizer(example["text"], truncation=True)

def roberta_tokenize_function(example):
    return roberta_tokenizer(example["text"], truncation=True)

# Remove when ready for full training
imdb_train_tokenized_bert = imdb_train_samp.map(bert_tokenize_function, batched=True)
imdb_validation_tokenized_bert = imdb_validation_samp.map(bert_tokenize_function, batched=True)

imdb_train_tokenized_dibert = imdb_train_samp.map(dibert_tokenize_function, batched=True)
imdb_validation_tokenized_dibert = imdb_validation_samp.map(dibert_tokenize_function, batched=True)

imdb_train_tokenized_roberta = imdb_train_samp.map(roberta_tokenize_function, batched=True)
imdb_validation_tokenized_roberta = imdb_validation_samp.map(roberta_tokenize_function, batched=True)

ag_train_tokenized_bert = ag_train_samp.map(bert_tokenize_function, batched=True)
ag_validation_tokenized_bert = ag_validation_samp.map(bert_tokenize_function, batched=True)

ag_train_tokenized_dibert = ag_train_samp.map(dibert_tokenize_function, batched=True)
ag_validation_tokenized_dibert = ag_validation_samp.map(dibert_tokenize_function, batched=True)

ag_train_tokenized_roberta = ag_train_samp.map(roberta_tokenize_function, batched=True)
ag_validation_tokenized_roberta = ag_validation_samp.map(roberta_tokenize_function, batched=True)

# Uncomment when ready for full training
# imdb_tokenized_bert = imdb_dataset.map(bert_tokenize_function, batched=True)
# imdb_tokenized_dibert = imdb_dataset.map(dibert_tokenize_function, batched=True)
# imdb_tokenized_roberta = imdb_dataset.map(roberta_tokenize_function, batched=True)
#
# ag_tokenized_bert = ag_dataset.map(bert_tokenize_function, batched=True)
# ag_tokenized_dibert = ag_dataset.map(dibert_tokenize_function, batched=True)
# ag_tokenized_roberta = ag_dataset.map(roberta_tokenize_function, batched=True)

data_collator_bert = DataCollatorWithPadding(tokenizer=bert_tokenizer, return_tensors="tf")
data_collator_dibert = DataCollatorWithPadding(tokenizer=dibert_tokenizer, return_tensors="tf")
data_collator_roberta = DataCollatorWithPadding(tokenizer=roberta_tokenizer, return_tensors="tf")

# Remove when ready for full training
imdb_train_bert = imdb_train_tokenized_bert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator_bert,
    batch_size=8,
)

imdb_validation_bert = imdb_validation_tokenized_bert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator_bert,
    batch_size=8,
)

imdb_train_dibert = imdb_train_tokenized_dibert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator_dibert,
    batch_size=8,
)

imdb_validation_dibert = imdb_validation_tokenized_dibert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator_dibert,
    batch_size=8,
)

imdb_train_roberta = imdb_train_tokenized_roberta.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator_roberta,
    batch_size=8,
)

imdb_validation_roberta = imdb_validation_tokenized_roberta.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator_roberta,
    batch_size=8,
)

ag_train_bert = ag_train_tokenized_bert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator_bert,
    batch_size=8,
)

ag_validation_bert = ag_validation_tokenized_bert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator_bert,
    batch_size=8,
)

ag_train_dibert = ag_train_tokenized_dibert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator_dibert,
    batch_size=8,
)

ag_validation_dibert = ag_validation_tokenized_dibert.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator_dibert,
    batch_size=8,
)

ag_train_roberta = ag_train_tokenized_roberta.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=True,
    collate_fn=data_collator_roberta,
    batch_size=8,
)

ag_validation_roberta = ag_validation_tokenized_roberta.to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["label"],
    shuffle=False,
    collate_fn=data_collator_roberta,
    batch_size=8,
)

# Uncomment when ready for full training
# imdb_train_bert = imdb_tokenized_bert["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=True,
#     collate_fn=data_collator_bert,
#     batch_size=8,
# )

# imdb_validation_bert = imdb_tokenized_bert["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=False,
#     collate_fn=data_collator_bert,
#     batch_size=8,
# )

# imdb_train_dibert = imdb_tokenized_dibert["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=True,
#     collate_fn=data_collator_dibert,
#     batch_size=8,
# )

# imdb_validation_dibert = imdb_tokenized_dibert["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=False,
#     collate_fn=data_collator_dibert,
#     batch_size=8,
# )

# imdb_train_roberta = imdb_tokenized_roberta["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=True,
#     collate_fn=data_collator_roberta,
#     batch_size=8,
# )

# imdb_validation_roberta = imdb_tokenized_roberta["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=False,
#     collate_fn=data_collator_roberta,
#     batch_size=8,
# )

# ag_train_bert = ag_tokenized_bert["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=True,
#     collate_fn=data_collator_bert,
#     batch_size=8,
# )

# ag_validation_bert = ag_tokenized_bert["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=False,
#     collate_fn=data_collator_bert,
#     batch_size=8,
# )

# ag_train_dibert = ag_tokenized_dibert["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=True,
#     collate_fn=data_collator_dibert,
#     batch_size=8,
# )

# ag_validation_dibert = ag_tokenized_dibert["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=False,
#     collate_fn=data_collator_dibert,
#     batch_size=8,
# )

# ag_train_roberta = ag_tokenized_roberta["train"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=True,
#     collate_fn=data_collator_roberta,
#     batch_size=8,
# )

# ag_validation_roberta = ag_tokenized_roberta["test"].to_tf_dataset(
#     columns=["attention_mask", "input_ids", "token_type_ids"],
#     label_cols=["label"],
#     shuffle=False,
#     collate_fn=data_collator_roberta,
#     batch_size=8,
# )

Downloading builder script:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Downloading and preparing dataset imdb/plain_text (download: 80.23 MiB, generated: 127.02 MiB, post-processed: Unknown size, total: 207.25 MiB) to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...


Downloading data:   0%|          | 0.00/84.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Dataset imdb downloaded and prepared to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

Downloading builder script:   0%|          | 0.00/1.83k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

Using custom data configuration default


Downloading and preparing dataset ag_news/default (download: 29.88 MiB, generated: 30.23 MiB, post-processed: Unknown size, total: 60.10 MiB) to /root/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548...


Downloading data:   0%|          | 0.00/11.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/751k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

Dataset ag_news downloaded and prepared to /root/.cache/huggingface/datasets/ag_news/default/0.0.0/bc2bcb40336ace1a0374767fc29bb0296cdaf8a6da7298436239c54d79180548. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

SUPERVISED LEARNING, MACHINE LEARNING TECHNIQUES

In [22]:
# Importing the libraries required to fit ML and DL models on our datasets
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

# Remove when ready for full training
imdb_train_y = imdb_train_samp["label"]
imdb_test_y = imdb_validation_samp["label"]

ag_train_y = ag_train_samp["label"]
ag_test_y = ag_validation_samp["label"]

# Creating the document-term matrices
imdb_vectoriser = CountVectorizer()
ag_vectoriser = CountVectorizer()

imdb_train_docs = imdb_train_samp["text"]
imdb_train_dtm = imdb_vectoriser.fit_transform(imdb_train_docs)
imdb_train_dtm = pd.DataFrame(imdb_train_dtm.toarray(), columns=imdb_vectoriser.get_feature_names_out())
imdb_validation_docs = imdb_validation_samp["text"]
imdb_validation_dtm = imdb_vectoriser.transform(imdb_validation_docs)
imdb_validation_dtm = pd.DataFrame(imdb_validation_dtm.toarray(), columns=imdb_vectoriser.get_feature_names_out())

ag_train_docs = ag_train_samp["text"]
ag_train_dtm = ag_vectoriser.fit_transform(ag_train_docs)
ag_train_dtm = pd.DataFrame(ag_train_dtm.toarray(), columns=ag_vectoriser.get_feature_names_out())
ag_validation_docs = ag_validation_samp["text"]
ag_validation_dtm = ag_vectoriser.transform(ag_validation_docs)
ag_validation_dtm = pd.DataFrame(ag_validation_dtm.toarray(), columns=ag_vectoriser.get_feature_names_out())

# Training a Naive Bayes model on the datasets
imdb_nb = GaussianNB()
ag_nb = GaussianNB()
imdb_nb_fit = imdb_nb.fit(imdb_train_dtm, imdb_train_y)
ag_nb_fit = ag_nb.fit(ag_train_dtm, ag_train_y)

# Training a Logistic Regression model on the datasets
imdb_logreg = LogisticRegression(solver = 'liblinear')
ag_logreg = LogisticRegression(solver = 'liblinear')
imdb_logreg_fit = imdb_logreg.fit(imdb_train_dtm, imdb_train_y)
ag_logreg_fit = ag_logreg.fit(ag_train_dtm, ag_train_y)

# Training an SVM model on the datasets
imdb_svm = SGDClassifier(random_state=123)
ag_svm = SGDClassifier(random_state=123)
imdb_svm_fit = imdb_svm.fit(imdb_train_dtm, imdb_train_y)
ag_svm_fit = ag_svm.fit(ag_train_dtm, ag_train_y)

# Training a Random Forests model on the datasets
imdb_rf = RandomForestClassifier(random_state=123)
ag_rf = RandomForestClassifier(random_state=123)
imdb_rf_fit = imdb_rf.fit(imdb_train_dtm, imdb_train_y)
ag_rf_fit = ag_rf.fit(ag_train_dtm, ag_train_y)

In [24]:
# Using our models to obtain predictions
imdb_nb_preds = imdb_nb_fit.predict(imdb_validation_dtm)
print("Accuracy for IMDd dataset with Naive Bayes = ", np.mean(imdb_nb_preds == imdb_test_y))

imdb_logreg_preds = imdb_logreg_fit.predict(imdb_validation_dtm)
print("Accuracy for IMDd dataset with Logistic Regression = ", np.mean(imdb_logreg_preds == imdb_test_y))

imdb_svm_preds = imdb_svm_fit.predict(imdb_validation_dtm)
print("Accuracy for IMDd dataset with SVM = ", np.mean(imdb_svm_preds == imdb_test_y))

imdb_rf_preds = imdb_rf_fit.predict(imdb_validation_dtm)
print("Accuracy for IMDd dataset with Random Forests = ", np.mean(imdb_rf_preds == imdb_test_y))

ag_nb_preds = ag_nb_fit.predict(ag_validation_dtm)
print("Accuracy for AG News dataset with Naive Bayes = ", np.mean(ag_nb_preds == ag_test_y))

ag_logreg_preds = ag_logreg_fit.predict(ag_validation_dtm)
print("Accuracy for AG News dataset with Logistic Regression = ", np.mean(ag_logreg_preds == ag_test_y))

ag_svm_preds = ag_svm_fit.predict(ag_validation_dtm)
print("Accuracy for AG News dataset with SVM = ", np.mean(ag_svm_preds == ag_test_y))

ag_rf_preds = ag_rf_fit.predict(ag_validation_dtm)
print("Accuracy for AG News dataset with Random Forests = ", np.mean(ag_rf_preds == ag_test_y))

Accuracy for IMDd dataset with Naive Bayes =  0.599
Accuracy for IMDd dataset with Logistic Regression =  0.8255
Accuracy for IMDd dataset with SVM =  0.808
Accuracy for IMDd dataset with Random Forests =  0.8095
Accuracy for AG News dataset with Naive Bayes =  0.80025
Accuracy for AG News dataset with Logistic Regression =  0.8605
Accuracy for AG News dataset with SVM =  0.84825
Accuracy for AG News dataset with Random Forests =  0.821


SUPERVISED LEARNING, DEEP LEARNING TECHNIQUES

TRANSFER LEARNING, DEEP LEARNING TECHNIQUES

In [4]:
# Loading in the libraries required to fine-tune BERT, DistilBERT and RoBERTa on our dataset
from transformers import TFAutoModelForSequenceClassification
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Defining our learning rate scheduler and optimiser
batch_size = 8
num_epochs = 3

imdb_num_train_bert = len(imdb_train_bert) * num_epochs
imdb_num_train_dibert = len(imdb_train_dibert) * num_epochs
imdb_num_train_roberta = len(imdb_train_roberta) * num_epochs

ag_num_train_bert = len(ag_train_bert) * num_epochs
ag_num_train_dibert = len(ag_train_dibert) * num_epochs
ag_num_train_roberta = len(ag_train_roberta) * num_epochs

imdb_lr_scheduler_bert = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=imdb_num_train_bert
)
imdb_optimiser_bert = Adam(learning_rate=imdb_lr_scheduler_bert)

imdb_lr_scheduler_dibert = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=imdb_num_train_dibert
)
imdb_optimiser_dibert = Adam(learning_rate=imdb_lr_scheduler_dibert)

imdb_lr_scheduler_roberta = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=imdb_num_train_roberta
)
imdb_optimiser_roberta = Adam(learning_rate=imdb_lr_scheduler_roberta)

ag_lr_scheduler_bert = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=ag_num_train_bert
)
ag_optimiser_bert = Adam(learning_rate=ag_lr_scheduler_bert)

ag_lr_scheduler_dibert = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=ag_num_train_dibert
)
ag_optimiser_dibert = Adam(learning_rate=ag_lr_scheduler_dibert)

ag_lr_scheduler_roberta = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=ag_num_train_roberta
)
ag_optimiser_roberta = Adam(learning_rate=ag_lr_scheduler_roberta)

# Fitting the models to our dataset
imdb_model_bert = TFAutoModelForSequenceClassification.from_pretrained(bert_checkpoint, num_labels=2)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
imdb_model_bert.compile(optimizer=imdb_optimiser_bert, loss=loss, metrics=["accuracy"])
imdb_model_bert.fit(imdb_train_bert, validation_data=imdb_validation_bert, epochs=num_epochs)

imdb_model_dibert = TFAutoModelForSequenceClassification.from_pretrained(dibert_checkpoint, num_labels=2)
imdb_model_dibert.compile(optimizer=imdb_optimiser_dibert, loss=loss, metrics=["accuracy"])
imdb_model_dibert.fit(imdb_train_dibert, validation_data=imdb_validation_dibert, epochs=num_epochs)

imdb_model_roberta = TFAutoModelForSequenceClassification.from_pretrained(roberta_checkpoint, num_labels=2)
imdb_model_roberta.compile(optimizer=imdb_optimiser_roberta, loss=loss, metrics=["accuracy"])
imdb_model_roberta.fit(imdb_train_roberta, validation_data=imdb_validation_roberta, epochs=num_epochs)

ag_model_bert = TFAutoModelForSequenceClassification.from_pretrained(bert_checkpoint, num_labels=4)
ag_model_bert.compile(optimizer=ag_optimiser_bert, loss=loss, metrics=["accuracy"])
ag_model_bert.fit(ag_train_bert, validation_data=ag_validation_bert, epochs=num_epochs)

ag_model_dibert = TFAutoModelForSequenceClassification.from_pretrained(dibert_checkpoint, num_labels=4)
ag_model_dibert.compile(optimizer=ag_optimiser_dibert, loss=loss, metrics=["accuracy"])
ag_model_dibert.fit(ag_train_dibert, validation_data=ag_validation_dibert, epochs=num_epochs)

ag_model_roberta = TFAutoModelForSequenceClassification.from_pretrained(roberta_checkpoint, num_labels=4)
ag_model_roberta.compile(optimizer=ag_optimiser_roberta, loss=loss, metrics=["accuracy"])
ag_model_roberta.fit(ag_train_roberta, validation_data=ag_validation_roberta, epochs=num_epochs)

Downloading:   0%|          | 0.00/511M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3
 10/125 [=>............................] - ETA: 1:23:14 - loss: 0.7226 - accuracy: 0.4500

KeyboardInterrupt: ignored

In [None]:
# Using our models to obtain predictions
from datasets import load_metric
imdb_bert_preds = imdb_model_bert.predict(imdb_validation_bert)["logits"]
imdb_bert_class_preds = np.argmax(imdb_bert_preds, axis=1)
metric = load_metric("accuracy")
print("Accuracy for IMDd dataset with BERT = ", metric.compute(predictions=imdb_bert_class_preds, references=imdb_validation_samp["label"]))

imdb_dibert_preds = imdb_model_dibert.predict(imdb_validation_dibert)["logits"]
imdb_dibert_class_preds = np.argmax(imdb_dibert_preds, axis=1)
print("Accuracy for IMDd dataset with DistilBERT = ", metric.compute(predictions=imdb_dibert_class_preds, references=imdb_validation_samp["label"]))

imdb_roberta_preds = imdb_model_roberta.predict(imdb_validation_roberta)["logits"]
imdb_roberta_class_preds = np.argmax(imdb_roberta_preds, axis=1)
print("Accuracy for IMDd dataset with RoBERTa = ", metric.compute(predictions=imdb_roberta_class_preds, references=imdb_validation_samp["label"]))

ag_bert_preds = ag_model_bert.predict(ag_validation_bert)["logits"]
ag_bert_class_preds = np.argmax(ag_bert_preds, axis=1)
print("Accuracy for AG News dataset with BERT = ", metric.compute(predictions=ag_bert_class_preds, references=ag_validation_samp["label"]))

ag_dibert_preds = ag_model_dibert.predict(ag_validation_dibert)["logits"]
ag_dibert_class_preds = np.argmax(ag_dibert_preds, axis=1)
print("Accuracy for AG News dataset with DistilBERT = ", metric.compute(predictions=ag_dibert_class_preds, references=ag_validation_samp["label"]))

ag_roberta_preds = ag_model_roberta.predict(ag_validation_roberta)["logits"]
ag_roberta_class_preds = np.argmax(ag_roberta_preds, axis=1)
print("Accuracy for AG News dataset with RoBERTa = ", metric.compute(predictions=ag_roberta_class_preds, references=ag_validation_samp["label"]))