This is an example of fine-tuning a bert model. Our sarcastic datasets are not used here.

In [None]:
pip install transformers

In [None]:
pip install datasets

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
import numpy as np

raw_datasets = load_dataset("glue", "mrpc")
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)


def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")

tf_train_dataset = tokenized_datasets["train"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["labels"],
    shuffle=True,
    collate_fn=data_collator,
    batch_size=8,
)

tf_validation_dataset = tokenized_datasets["validation"].to_tf_dataset(
    columns=["attention_mask", "input_ids", "token_type_ids"],
    label_cols=["labels"],
    shuffle=False,
    collate_fn=data_collator,
    batch_size=8,
)

In [None]:
from transformers import TFAutoModelForSequenceClassification
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

batch_size = 8
num_epochs = 3
# The number of training steps is the number of samples in the dataset, divided by the batch size then multiplied
# by the total number of epochs
num_train_steps = len(tf_train_dataset) * num_epochs
lr_scheduler = PolynomialDecay(initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=num_train_steps)
opt = Adam(learning_rate=lr_scheduler)


model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=opt, loss=loss, metrics=["accuracy"])

model.fit(tf_train_dataset, validation_data=tf_validation_dataset)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.




<keras.callbacks.History at 0x7f38010c0e50>

In [None]:

from datasets import load_metric

preds = model.predict(tf_validation_dataset)["logits"]
class_preds = np.argmax(preds, axis=1)
print(preds.shape, class_preds.shape)

for i in range(50):
  print(preds[i])

#metric = load_metric("glue", "mrpc")
#metric.compute(predictions=class_preds, references=raw_datasets["validation"]["label"])

(408, 2) (408,)
[-0.20577943  0.5039454 ]
[-0.20558232  0.50362176]
[-0.20539574  0.5037148 ]
[-0.20537688  0.50398105]
[-0.20531353  0.50380045]
[-0.20580117  0.5039538 ]
[-0.20533745  0.5039904 ]
[-0.20557937  0.5036268 ]
[-0.20527922  0.50353825]
[-0.20540503  0.5036581 ]
[-0.20560683  0.50390506]
[-0.20536211  0.50375956]
[-0.2052948   0.50373054]
[-0.20552957  0.50374174]
[-0.20547883  0.50409275]
[-0.2058427   0.50335675]
[-0.20554043  0.5039207 ]
[-0.20533559  0.5037407 ]
[-0.2056292  0.5041536]
[-0.20531698  0.5037804 ]
[-0.20527633  0.5035466 ]
[-0.20537162  0.5036404 ]
[-0.20535259  0.5041412 ]
[-0.20566209  0.5037403 ]
[-0.20582181  0.5037507 ]
[-0.20542145  0.50328606]
[-0.205647   0.5035273]
[-0.2057134   0.50385886]
[-0.20546806  0.5040977 ]
[-0.20511107  0.50390106]
[-0.20552187  0.50359166]
[-0.20567954  0.50405705]
[-0.20574899  0.50381356]
[-0.20527397  0.5037373 ]
[-0.20539422  0.50401306]
[-0.20568775  0.50373113]
[-0.20572823  0.50342655]
[-0.20539354  0.5036909 ]


In [None]:
for i in tf_validation_dataset:
  preds = model.predict(i)["logits"]
  class_preds = np.argmax(preds, axis=1)
  print(class_preds)
  #break

In [None]:
t = tokenizer("how are you going to do it","crying is good",padding = True, return_tensors="tf")
preds = model(t)["logits"]
prob = tf.nn.softmax(preds)
print(prob)
class_preds = np.argmax(prob, axis=1)
print(class_preds)

tf.Tensor([[0.32989815 0.6701018 ]], shape=(1, 2), dtype=float32)
[1]
