In [4]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch import nn


In [2]:
# Load the data
data = pd.read_csv('../datasets/data.csv')


In [4]:
data.dropna(inplace=True)

In [5]:
to_test = ['  '.join(x) for x in list(zip(data.Headline.values, data.Body.values))]
to_test[0]

'Four ways Bob Corker skewered Donald Trump  Image copyright Getty Images\nOn Sunday morning, Donald Trump went off on a Twitter tirade against a member of his own party.\nThis, in itself, isn\'t exactly huge news. It\'s far from the first time the president has turned his rhetorical cannons on his own ranks.\nThis time, however, his attacks were particularly biting and personal. He essentially called Tennessee Senator Bob Corker, the chair of the powerful Senate Foreign Relations Committee, a coward for not running for re-election.\nHe said Mr Corker "begged" for the president\'s endorsement, which he refused to give. He wrongly claimed that Mr Corker\'s support of the Iranian nuclear agreement was his only political accomplishment.\nUnlike some of his colleagues, Mr Corker - free from having to worry about his immediate political future - didn\'t hold his tongue.\nSkip Twitter post by @SenBobCorker It\'s a shame the White House has become an adult day care center. Someone obviously m

In [6]:
import fake_news_utils as fnu
BATCH_SIZE = 4
test_dataset = fnu.dataset_tokenized_sentences(to_test, data.Label.values)

test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)

In [7]:
class BertBinaryClassifier(nn.Module):
    def __init__(self, dropout=0.1):
        super(BertBinaryClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-uncased', return_dict=False)

        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, tokens, masks=None):
        pooled_output = self.bert(tokens, attention_mask=masks, output_hidden_states=False)[1]
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        proba = self.sigmoid(linear_output)
        return proba

In [8]:
# Load the model
bert_clf = torch.load('/asd/CSML/Distributed Network Systems/fake-news-project/datasets/bert_clf.pth')

In [9]:
# Evaluate the model
from sklearn.metrics import classification_report

device = torch.device('cuda')
bert_clf.eval()
bert_predicted = []
all_logits = []
with torch.no_grad():
    for step_num, batch_data in enumerate(test_dataloader):

        token_ids, masks, labels = tuple(t.to(device) for t in batch_data)

        logits = bert_clf(token_ids, masks)
        loss_func = nn.BCELoss()
        loss = loss_func(logits, labels)
        numpy_logits = logits.cpu().detach().numpy()

        bert_predicted += list(numpy_logits[:, 0] > 0.5)
        all_logits += list(numpy_logits[:, 0])

np.mean(bert_predicted)

print(classification_report(data.Label.values, bert_predicted))

              precision    recall  f1-score   support

           0       0.67      0.97      0.80      2120
           1       0.93      0.47      0.62      1868

    accuracy                           0.73      3988
   macro avg       0.80      0.72      0.71      3988
weighted avg       0.79      0.73      0.71      3988



In [10]:
data.Label.values.shape

(3988,)