In [1]:
import tensorflow as tf
from transformers import TFAutoModel, AutoTokenizer
from datasets import load_dataset


def modeldefine():
  model = TFAutoModel.from_pretrained("bert-base-uncased")
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
  emotions = load_dataset('SetFit/emotion')
  def tokenize(batch):
      return tokenizer(batch["text"], padding=True, truncation=True)
  emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
  emotions_encoded.set_format('tf',columns=['input_ids', 'attention_mask', 'token_type_ids', 'label'])
  BATCH_SIZE = 64
  def order(inp):
      '''
      This function will group all the inputs of BERT
      into a single dictionary and then output it with
      labels.
      '''
      data = list(inp.values())
      return {
          'input_ids': data[1],
          'attention_mask': data[2],
          'token_type_ids': data[3]
      }, data[0]
  # converting train split of `emotions_encoded` to tensorflow format
  train_dataset = tf.data.Dataset.from_tensor_slices(emotions_encoded['train'][:])
  # set batch_size and shuffle
  train_dataset = train_dataset.batch(BATCH_SIZE).shuffle(1000)
  # map the `order` function
  train_dataset = train_dataset.map(order, num_parallel_calls=tf.data.AUTOTUNE)

  # ... doing the same for test set ...
  test_dataset = tf.data.Dataset.from_tensor_slices(emotions_encoded['test'][:])
  test_dataset = test_dataset.batch(BATCH_SIZE)
  test_dataset = test_dataset.map(order, num_parallel_calls=tf.data.AUTOTUNE)
  class BERTForClassification(tf.keras.Model):

      def __init__(self, bert_model, num_classes):
          super().__init__()
          self.bert = bert_model
          self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')

      def call(self, inputs):
          x = self.bert(inputs)[1]
          return self.fc(x)
  inp, out = next(iter(train_dataset)) # a batch from train_dataset
  print(inp, '\n\n', out)
  classifier = BERTForClassification(model, num_classes=6)
  classifier.compile(
      optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
      loss=tf.keras.losses.SparseCategoricalCrossentropy(),
      metrics=['accuracy']
  )
  history = classifier.fit(
      train_dataset,
      epochs=3)
  train_pred=classifier.evaluate(train_dataset)[1]
  print("the training acc prediction value is",float(train_pred*100))
  classifier.summary()
  pred=classifier.evaluate(test_dataset)[1]
  print("the final prediction value is",float(pred*100))
  classifier.predict()


