In [None]:
pip install transformers

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import TFAutoModel, AutoTokenizer


In [None]:
train_df = pd.read_csv('drive/MyDrive/liar_dataset/train.tsv',sep='\t', header=None, names=['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'])
test_df = pd.read_csv('drive/MyDrive/liar_dataset/test.tsv',sep='\t', header=None, names=['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'])
val_df = pd.read_csv('drive/MyDrive/liar_dataset/valid.tsv',sep='\t', header=None, names=['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'])

In [None]:
df = pd.concat([train_df, test_df], axis=0)

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
X = df['statement'].values
X = tokenizer(X.tolist(), padding=True, truncation=True, max_length=128, return_tensors='tf')
y = pd.get_dummies(df['label']).values

X_train = [X['input_ids'].numpy()[0:train_df.shape[0]], X['attention_mask'].numpy()[0:train_df.shape[0]]]
y_train = y[:train_df.shape[0]]
X_test = [X['input_ids'].numpy()[train_df.shape[0]:], X['attention_mask'].numpy()[train_df.shape[0]:]]
y_test = y[train_df.shape[0]:]

bert_model = TFAutoModel.from_pretrained('bert-base-uncased')

input_ids = tf.keras.layers.Input(shape=(128,), dtype='int32')
input_masks = tf.keras.layers.Input(shape=(128,), dtype='int32')
bert_output = bert_model({'input_ids': input_ids, 'attention_mask': input_masks})[0]
output_layer = tf.keras.layers.Dense(6, activation='softmax')(bert_output[:, 0, :])
model = tf.keras.models.Model(inputs=[input_ids, input_masks], outputs=output_layer)
for layer in model.layers[:4]:
    layer.trainable = False


In [None]:
import keras.backend as K
def f1_metric(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', f1_metric])

history = model.fit(X_train, y_train, epochs=3, batch_size=32)

score = model.evaluate(X_test, y_test, batch_size=32)
print('Test loss:', score[0])
print('Test accuracy:', score[1])