In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import re

In [3]:
import nltk

In [4]:
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

In [5]:
import random

In [6]:
CUR_DATASET = "LIAR-PLUS"

In [7]:
test_dataset = pd.read_csv(f'../data/{CUR_DATASET}/test2.tsv', sep='\t', header = None)

In [9]:
test_dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,0,11972.json,true,Building a wall on the U.S.-Mexico border will...,immigration,rick-perry,Governor,Texas,republican,30,30,42,23,18,Radio interview,"Meantime, engineering experts agree the wall w..."
1,1,11685.json,false,Wisconsin is on pace to double the number of l...,jobs,katrina-shankland,State representative,Wisconsin,democrat,2,1,0,0,0,a news conference,She cited layoff notices received by the state...
2,2,11096.json,false,Says John McCain has done nothing to help the ...,"military,veterans,voting-record",donald-trump,President-Elect,New York,republican,63,114,51,37,61,comments on ABC's This Week.,"Trump said that McCain ""has done nothing to he..."
3,3,5209.json,half-true,Suzanne Bonamici supports a plan that will cut...,"medicare,message-machine-2012,campaign-adverti...",rob-cornilles,consultant,Oregon,republican,1,1,3,1,1,a radio show,"But spending still goes up. In addition, many ..."
4,4,9524.json,pants-fire,When asked by a reporter whether hes at the ce...,"campaign-finance,legal-issues,campaign-adverti...",state-democratic-party-wisconsin,,Wisconsin,democrat,5,7,2,2,7,a web video,Our rating A Democratic Party web video making...


In [30]:
test = test_dataset.iloc[:, [2, 3, 15]]
test = test.rename(columns = {2: 'label', 3: 'statements', 15: 'justification'})

In [31]:
for dataset in [test]:
    dataset['label'] = dataset['label'].replace({
        'false' : 0,
        'barely-true' : 1,
        'pants-fire' : 2,
        'half-true' : 3,
        'mostly-true' : 4,
        'true' : 5
    })

In [32]:
for dataset in [test]:
    dataset = dataset.dropna(inplace=True)

In [33]:
test_labels = test["label"].values.astype(int)

## Data Preprocessing and Preparation

In [34]:
if_stopwords = True
if_lemmatize = True


### Regular Expressions

In [35]:
preprocessing_text_fn = {
    "no_punctuation": lambda txt: re.sub(r'[^\w\s]',' ', txt),
    "no_special_symbols": lambda txt: re.sub('[$,#,&]', '', txt),
    # "no_digits": lambda txt: re.sub('\d*', '', txt),
    "no_www": lambda txt: re.sub('w{3}', '', txt),
    "no_urls": lambda txt: re.sub('http\S+', '', txt),
    "no_spaces": lambda txt: re.sub('\s+', ' ', txt),
    "no_single_chars": lambda txt: re.sub(r'\s+[a-zA-Z]\s+', ' ', txt)
}


In [36]:
def preprocess_text(text, pipeline = preprocessing_text_fn):
    text = str(text)
    for fn in pipeline.keys():
        text = pipeline[fn](text)
        
    return text

In [37]:
for dataset in [test]:
    dataset["statements"] = dataset["statements"].apply(preprocess_text)
    dataset["justification"] = dataset["justification"].apply(preprocess_text)
test.head(5)

Unnamed: 0,label,statements,justification
0,5,Building wall on the S Mexico border will take...,Meantime engineering experts agree the wall wo...
1,0,Wisconsin is on pace to double the number of l...,She cited layoff notices received by the state...
2,0,Says John McCain has done nothing to help the ...,Trump said that McCain has done nothing to hel...
3,3,Suzanne Bonamici supports plan that will cut c...,But spending still goes up In addition many ou...
4,2,When asked by reporter whether hes at the cent...,Our rating Democratic Party web video making t...


In [39]:
stop_words = stopwords.words('english')
if if_stopwords:
    for dataset in [test]:
        for col in ["statements", "justification"]:
            dataset[col] = dataset[col].str.lower().str.replace("’", "'")
            dataset[col] = dataset[col].apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words]))

### Lemmatization

In [40]:
if if_lemmatize:
    
    import nltk
    nltk.download('wordnet')
    nltk.download('omw-1.4')
    from nltk.stem import WordNetLemmatizer
    
    wnl = WordNetLemmatizer()
    
    for dataset in [test]:
        for col in ["statements", "justification"]:
            dataset[col] = dataset[col].str.lower().str.replace("’", "'")
            dataset[col] = dataset[col].apply(lambda x: ' '.join([wnl.lemmatize(word) for word in word_tokenize(x)]))

[nltk_data] Downloading package wordnet to /home/marneusz/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/marneusz/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [41]:
test_text = (test["statements"] + " " + test["justification"]).values

# Model Loading

In [42]:
from tqdm import tqdm

In [22]:
import torch
if torch.cuda.is_available():    
    device = torch.device('cuda')    
print(device)

cuda


  from .autonotebook import tqdm as notebook_tqdm


In [23]:
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig, get_linear_schedule_with_warmup
from torch.optim import AdamW
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

In [24]:
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, precision_score, recall_score

In [25]:
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)

In [26]:
model_name = "bert_regexp_stopwords_lemmatization"

In [27]:
model = BertForSequenceClassification.from_pretrained(
    f"./models/{model_name}/"
)
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [28]:
from bert_utils import tokenize_map

In [29]:
input_ids, attention_masks, labels = tokenize_map(test_text, test_labels)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44898/44898 [01:43<00:00, 433.65it/s]


In [30]:
import transformers

In [31]:
batch_size = 16
seed = 10
transformers.set_seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f6fcd936f70>

In [32]:
test_dataset = TensorDataset(input_ids, attention_masks, labels)

In [33]:
test_dataloader = DataLoader(
            test_dataset, 
            sampler = SequentialSampler(test_dataset), 
            batch_size = batch_size 
)

### Metrics

In [43]:
def accuracy(predictions, labels):
    pred_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    
    return accuracy_score(labels_flat, pred_flat)

def flat_f1_score(predictions, labels):
    pred_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    

    return f1_score(labels_flat, pred_flat, zero_division=0)

def mae(predictions, labels):
    pred_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.mean(np.abs(pred_flat - labels_flat))

def mse(predictions, labels):
    pred_flat = np.argmax(predictions, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.mean((pred_flat - labels_flat) ** 2)

### Evaluation

In [35]:
import time

In [36]:
t0 = time.time()

model.eval()

total_eval_accuracy = 0
total_eval_loss = 0
total_eval_f1 = 0
total_eval_mae = 0
total_eval_mse = 0
nb_eval_steps = 0

for batch in tqdm(test_dataloader):

    b_input_ids = batch[0].to(device)
    b_input_mask = batch[1].to(device)
    b_labels = batch[2].to(device)

    with torch.no_grad():        

        loss = model(b_input_ids, 
                     token_type_ids=None, 
                     attention_mask=b_input_mask,
                     labels=b_labels)[0]

        logits = model(b_input_ids, 
                       token_type_ids=None, 
                       attention_mask=b_input_mask,
                       labels=b_labels)[1]

    # Accumulate the validation loss.

    total_eval_loss += loss.item()

    # Move logits and labels to CPU:

    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    # Calculate the accuracy for this batch of test sentences, and accumulate it over all batches:

    total_eval_accuracy += accuracy(logits, label_ids)
    total_eval_f1 += flat_f1_score(logits, label_ids)
    total_eval_mae += mae(logits, label_ids)
    total_eval_mse += mse(logits, label_ids)


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2807/2807 [26:44<00:00,  1.75it/s]


In [37]:
# Report the final accuracy for this validation run.

avg_val_accuracy = total_eval_accuracy / len(test_dataloader)
print('  Accuracy: {0:.5f}'.format(avg_val_accuracy))

# Report the final f1 score for this validation run.

avg_val_f1 = total_eval_f1 / len(test_dataloader)
print('  F1: {0:.5f}'.format(avg_val_f1))

avg_val_mae = total_eval_mae / len(test_dataloader)
print('  MAE: {0:.5f}'.format(avg_val_mae))

avg_val_mse = total_eval_mse / len(test_dataloader)
print('  MSE: {0:.5f}'.format(avg_val_mse))

# Calculate the average loss over all of the batches.

avg_val_loss = total_eval_loss / len(test_dataloader)



# Measure how long the validation run took:

test_time = time.time() - t0

print('  Test Loss: {0:.5f}'.format(avg_val_loss))
print('  Test took: {:}'.format(test_time))

  Accuracy: 0.41517
  F1: 0.57178
  Validation Loss: 5.92430
  Validation took: 1604.4437456130981
