In [1]:
import sys
if ".." not in sys.path:
    #sys.path.insert(0, "..")
    sys.path.append('../')
import logging
logging.basicConfig(level='ERROR')
from transformers import logging
logging.set_verbosity_error()
import time
import datetime
import os
import wandb
import wget
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import (AutoModel, AutoTokenizer, AutoModelForSequenceClassification, 
    get_linear_schedule_with_warmup, AutoConfig)
from sklearn import metrics
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
f1_score, cohen_kappa_score, roc_auc_score, confusion_matrix, log_loss,
matthews_corrcoef, average_precision_score)

#Custom modules
import utils
from utils import custom_models, early_stopping, worthiness_checker, constants

In [2]:
os.environ["WANDB_SILENT"] = "true"
os.environ["WANDB_NOTEBOOK_NAME"] = 'TransformersForClaimWorthiness.ipynb'

# Constants
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parent_dir = os.path.dirname(os.path.abspath(os.getcwd()))
seed_list = [7, 42] # seed_list = [7, 42, 127]
fold_count = 3 #5
patience=5
loss_function = nn.BCELoss()

metric_types = np.dtype(
    [
        ("mAP", float),
        ("auc", float),
        ("accuracy", float),
        ("precision", float),
        ("recall", float),
        ("f1", float),
        ("mcc", float),
        ("log_loss", float),
        ("loss", float)
    ])

In [3]:
constants = constants.Constants()
constants.device = device
constants.parent_dir = parent_dir
constants.seed_list = seed_list
constants.fold_count = fold_count
constants.patience = patience
constants.loss_function = loss_function
constants.metric_types = metric_types

In [11]:
data_dir = os.path.join(parent_dir, 'Data')

### Training Model

In [5]:
def get_data_from_file(data_version):
    train_df = pd.read_csv(os.path.join(parent_dir, 'Data','train_english_{}.tsv'.format(data_version)), delimiter='\t')
    test_df = pd.read_csv(os.path.join(parent_dir, 'Data','test_english_{}.tsv'.format(data_version)), delimiter='\t')
    return train_df, test_df

In [6]:
def get_fold_list(df: pd.DataFrame, fold_count, random_state):
    # Group positive and negative samples for stratified sampling
    sampling_df = df.sample(frac=1, replace=False, random_state=random_state)
    sampling_negative_df = sampling_df[sampling_df['check_worthiness']==0]
    sampling_positive_df = sampling_df[sampling_df['check_worthiness']==1]

    #determine fold length for both classes
    fold_size_for_negatives = sampling_negative_df.shape[0]//fold_count
    fold_size_for_positives = sampling_positive_df.shape[0]//fold_count

    fold_list = []
    for i in range(fold_count):
        fold_for_negatives = sampling_positive_df.iloc[fold_size_for_positives*i : fold_size_for_positives * (i+1), :]
        fold_for_positives = sampling_negative_df.iloc[fold_size_for_negatives*i : fold_size_for_negatives * (i+1), :]
        fold_df = pd.concat([fold_for_negatives, fold_for_positives]).sample(frac=1, replace=False, random_state=random_state)

        fold_list.append(fold_df)

    return fold_list

In [7]:


def create_dataset(df, tokenizer, config):
    max_token_length = config.max_token_length

    sentences = df.tweet_text.values
    labels = df.check_worthiness.values

    # Tokenize all of the sentences and map the tokens to thier word IDs.
    input_ids = []

    for sent in sentences:
        encoded_dict = tokenizer.encode_plus(
                            sent,                      # Sentence to encode.
                            add_special_tokens = True, # Add '[CLS]' and '[SEP]' or equivalent
                            max_length = max_token_length,           # 64? 4-128? Pad & truncate all sentences.
                            truncation=True,
                            padding = 'max_length',
                            return_attention_mask = False,   # Do not Construct attn. masks.
                            return_tensors = 'pt',     # Return pytorch tensors.
                       )

        # Add the encoded sentence to the list.    
        input_ids.append(encoded_dict['input_ids'])

    # Convert the lists into tensors.
    input_ids = torch.cat(input_ids, dim=0)
    labels = torch.tensor(labels).float()
    dataset = TensorDataset(input_ids, labels)
    return dataset

In [8]:
sweep_config = {
    'name': 'Bert_hyperparameters',
    'method': 'bayes', #grid, random
    'program': 'TransformersForClaimWorthiness.ipynb',
    'early_terminate': {
      'type': 'hyperband',
      'eta': 3,
      's': 2,
      'max_iter': 27   
    },
    'metric': {
      'name': 'avg_val_mAP',
      'goal': 'maximize'   
    },
    'parameters': {
        'data_version': {
          'values': ['raw', 'cleaned_with_mentions', 'cleaned_without_mentions'],
          'distribution': 'categorical'  
        },
        'max_token_length': {
           'min': 4,
           'max': 80,
           'distribution': 'int_uniform'
        },
        'model_name': {
          'values': ['bert-base-uncased'],
          'distribution': 'categorical'  
        },
        'hidden_act': {
          'values': ['relu', 'gelu', 'gelu_new', 'silu'],
          'distribution': 'categorical'  
        },
        'position_embedding_type': {
          'values': ['absolute', 'relative_key', 'relative_key_query'],
          'distribution': 'categorical'  
        },
        'attention_dropout': {
            'min': 0.001,
            'max': 0.2
        },
        'transformer_dropout': {
            'min': 0.001,
            'max': 0.2
        },
        'classifier_dropout': {
            'min': 0.001,
            'max': 0.2
        },
        'layer_norm_eps': {
            'min': 1e-14,
            'max': 1e-10
        },
        'batch_size': {
           'min': 2,
           'max': 80,
           'distribution': 'int_uniform'
        },
        'learning_rate': {
            'min': 0.0000005,
            'max': 0.00025
        },
        'epochs':{
           'min': 1,
           'max': 80,
           'distribution': 'int_uniform'
        }
    }
}


check_points = ['vinai/bertweet-covid19-base-uncased', 'roberta-base', 'bert-base-uncased']

In [9]:
sweep_defaults = {
    'data_version': 'cleaned_with_mentions',
    'max_token_length': 46,
    'model_name': 'bert-base-uncased',
    'hidden_act': 'gelu',
    'position_embedding_type': 'absolute',
    'layer_norm_eps': 5.4225686692811365e-11,
    'learning_rate': 0.000028734737822604655,
    'transformer_dropout': 0.03873251195245608,
    'attention_dropout': 0.015328152075297112,
    'classifier_dropout': 0.10850207289443518,
    'batch_size': 53,
    'epochs':25
}

In [10]:
# WANDB PARAMETER
def ret_dataloader(batch_size, train_dataset, validation_dataset):

    train_dataloader = DataLoader(
                train_dataset,  # The training samples.
                sampler = RandomSampler(train_dataset), # Select batches randomly
                batch_size = batch_size # Trains with this batch size.
            )

    validation_dataloader = DataLoader(
                validation_dataset, # The validation samples.
                sampler = SequentialSampler(validation_dataset), # Pull out batches sequentially.
                batch_size = batch_size # Evaluate with this batch size.
            )
    return train_dataloader, validation_dataloader

def ret_optim(model, config):
    #print('Learning_rate = ',wandb.config.learning_rate )
    optimizer = torch.optim.AdamW(model.parameters(),
                      lr = config.learning_rate, 
                      eps = 1e-8 
                    )
    return optimizer
    
def ret_scheduler(train_dataloader,optimizer, config):
    epochs = config.epochs
    total_steps = len(train_dataloader) * epochs

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps = 0, # Default value in run_glue.py
                                                num_training_steps = total_steps)
    return scheduler

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [11]:
def create_dataloaders(fold_list, fold_index, tokenizer, config):
    temp_list = fold_list.copy()

    trial_validation_df = temp_list.pop(fold_index)
    trial_train_df = pd.concat(temp_list)

    train_dataset = create_dataset(trial_train_df, tokenizer, config)
    validation_dataset = create_dataset(trial_validation_df, tokenizer, config)

    return ret_dataloader(config.batch_size, train_dataset, validation_dataset)

In [12]:

def get_metrics(probability, label_list):
    metrics_dictionary = {}
    # predictions = np.argmax(probability.detach().cpu().numpy(), axis=0)
    predictions =  [int(i > .5) for i in probability]

    accuracy = accuracy_score(label_list, predictions)
    precision = precision_score(label_list, predictions, zero_division=0)
    recall = recall_score(label_list, predictions, zero_division=0)
    f1 = f1_score(label_list, predictions, zero_division=0)
    log_loss = metrics.log_loss(label_list, predictions)
    mcc = matthews_corrcoef(label_list, predictions)
    auc = roc_auc_score(label_list, probability)
    
    mAP = average_precision_score(label_list, probability)

    metric_df = pd.DataFrame(np.empty(0, dtype=metric_types))
    metric_df.loc[0] = [mAP, auc, accuracy, precision, recall, f1, mcc, log_loss, 0]

    return metric_df

In [13]:
def log_metrics(metric_df, prefix):
    wandb.log({prefix + 'mAP':metric_df.loc[0, 'mAP'],
            prefix + 'auc':metric_df.loc[0, 'auc'],
            prefix + 'accuracy':metric_df.loc[0, 'accuracy'],
            prefix + 'precision':metric_df.loc[0, 'precision'],
            prefix + 'recall':metric_df.loc[0, 'recall'],
            prefix + 'f1':metric_df.loc[0, 'f1'],
            prefix + 'mcc':metric_df.loc[0, 'mcc'],
            prefix + 'log_loss':metric_df.loc[0, 'log_loss'],
            prefix + 'eval_loss':metric_df.loc[0, 'loss']
            })

In [14]:
def evaluate_one_epoch(model, device, dataloader):
        # Put the model in evaluation mode--the dropout layers behave differently
        # during evaluation.
        model.eval()

        # Tracking variables 
        total_eval_loss = 0
        nb_eval_steps = 0

        probability_list = torch.Tensor(0)
        label_list = np.empty(0)

        # Evaluate data for one epoch
        for batch in dataloader:
            
            b_input_ids = batch[0].to(device)
            b_labels = batch[1].to(device)

            with torch.no_grad():        
                probability = model(b_input_ids).flatten()
                loss = loss_function(probability, b_labels)

            # Accumulate the validation loss, probability and labels.
            total_eval_loss += loss.item()
            probability_list = torch.cat((probability_list, probability.detach().cpu()), axis=0)
            label_list = np.concatenate((label_list, batch[1]), axis=0)

            # Move probability and labels to CPU
            probability = probability.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            # del b_input_ids
            # del b_labels

        # Calculate and log metrics and loss.
        metrics_df = get_metrics(probability_list, label_list)
        metrics_df.loc[0,'loss'] = total_eval_loss / len(dataloader)

        return metrics_df

In [15]:
def train_one_epoch(model, device, dataloader, loss_function, optimizer, scheduler):
    total_train_loss = 0
    model.train()

    for step, batch in enumerate(dataloader):
        b_input_ids = batch[0].to(device)
        b_labels = batch[1].to(device)

        model.zero_grad()

        probability = model(b_input_ids)
        loss = loss_function(probability.flatten(), b_labels)
        total_train_loss += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()

        # del b_input_ids
        # del b_labels

    return total_train_loss / len(dataloader)

In [16]:

def cross_validate():
    # clean gpu memory in any case if previous wandb run was crashed.
    torch.cuda.empty_cache()
    run = wandb.init(config=sweep_defaults)
    run_start_time = time.time()
    # print(wandb.config.items())
    epochs = wandb.config.epochs

    model_name = wandb.config.model_name
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

    train_df, test_df = get_data_from_file(wandb.config.data_version)

    run_train_metrics_list = []
    run_val_metrics_list = []

    for seed_index, seed in enumerate(seed_list):

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        fold_list = get_fold_list(train_df, 3, random_state=seed)



        for fold_index in range(len(fold_list)): 

            train_dataloader, validation_dataloader = create_dataloaders(fold_list, fold_index, tokenizer, wandb.config)
            model = custom_models.TransformerClassifier(wandb.config).to(device)

            optimizer = ret_optim(model, wandb.config)
            scheduler = ret_scheduler(train_dataloader, optimizer, wandb.config)

            # Creating class that checks early stopping condition
            early_stopping = utils.early_stopping.EarlyStopping(patience=patience)

            epoch_train_metrics_list = []
            epoch_val_metrics_list = []

            for epoch_i in range(0, epochs):

                # ========================================
                #               Training
                # ========================================

                # print("")
                print('======== Seed {:} - Fold {:} - Epoch {:} / {:} ========'.format(seed_index+1, fold_index+1, epoch_i + 1, epochs))
                # print('Training...')
                training_start_time = time.time()

                epoch_train_loss = train_one_epoch(model, device, train_dataloader, loss_function, optimizer, scheduler)

                training_time = format_time(time.time() - training_start_time)
                wandb.log({'train_loss_':epoch_train_loss})

                # print("  Average training loss: {0:.2f}".format(epoch_train_loss))
                #print("  Training epcoh took: {:}".format(training_time))

                # ========================================
                #               Evaluation
                # ========================================

                # print("Running Evaluation...")
                evaluation_start_time = time.time()

                epoch_train_metrics = evaluate_one_epoch(model, device, train_dataloader)
                log_metrics(epoch_train_metrics, 'train_')
                epoch_train_metrics_list.append(epoch_train_metrics)

                epoch_val_metrics = evaluate_one_epoch(model, device, validation_dataloader)
                log_metrics(epoch_val_metrics, 'val_')
                epoch_val_metrics_list.append(epoch_val_metrics)

                val_mAP = epoch_val_metrics['mAP'].loc[0]
                train_mAP = epoch_train_metrics['mAP'].loc[0]

                print("  Training mAP: {:.3f} - Validation mAP: {:.3f}".format(train_mAP,val_mAP ))

                evaluation_time = format_time(time.time() - evaluation_start_time)
                #print("  Evaluation took: {:}".format(evaluation_time))
                if early_stopping.should_stop(val_mAP):
                    # print('terminating because of early stopping!')
                    wandb.log({'early_stopped_at': seed_index*len(fold_list)*epochs+fold_index*epochs+epoch_i + 1})
                    break

            # at the end of each fold, after every epoch finished, 
            # get last epoch`s metrics as final metrics of current fold
            #  # for  df in epoch_val_metrics_list:
            #  #     log_metrics(epoch_val_metrics, 'val_{:}_{:}_'.format(seed_index+1, fold_index+1))

            fold_train_metrics = epoch_train_metrics_list[-1]
            fold_val_metrics = epoch_val_metrics_list[-1]

            run_train_metrics_list.append(fold_train_metrics)
            run_val_metrics_list.append(fold_val_metrics)

            # at the end of every fold, 
            # calculate average metrics as final metrics of current run
            run_train_metrics = pd.concat(run_train_metrics_list)
            run_val_metrics = pd.concat(run_val_metrics_list)

            log_metrics(pd.DataFrame([run_train_metrics.mean()]), 'avg_train_')
            log_metrics(pd.DataFrame([run_val_metrics.mean()]), 'avg_val_')

            # del model
            # torch.cuda.empty_cache()

    # print("")
    # print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-run_start_time)))



In [17]:
# cross_validate()

In [18]:
project="Transformers_For_ClaimWorthiness"
entity="cemulu"

In [18]:
# sweep_id = wandb.sweep(sweep_config, project=project)
sweep_id = 'nbovtee3'

In [23]:
wandb.agent(sweep_id, project=project,function=cross_validate, count=1)

  Average training loss: 0.65
Running Evaluation...
  Training mAP: 0.545 - Validation mAP: 0.469
  Evaluation took: 0:00:03
  Average training loss: 0.63
Running Evaluation...
  Training mAP: 0.579 - Validation mAP: 0.538
  Evaluation took: 0:00:03
  Average training loss: 0.59
Running Evaluation...
  Training mAP: 0.658 - Validation mAP: 0.593
  Evaluation took: 0:00:03
  Average training loss: 0.54
Running Evaluation...
  Training mAP: 0.745 - Validation mAP: 0.616
  Evaluation took: 0:00:03
  Average training loss: 0.52
Running Evaluation...
  Training mAP: 0.770 - Validation mAP: 0.601
  Evaluation took: 0:00:03
  Average training loss: 0.46
Running Evaluation...
  Training mAP: 0.867 - Validation mAP: 0.613
  Evaluation took: 0:00:03
  Average training loss: 0.33
Running Evaluation...
  Training mAP: 0.908 - Validation mAP: 0.668
  Evaluation took: 0:00:04
  Average training loss: 0.32
Running Evaluation...
  Training mAP: 0.975 - Validation mAP: 0.679
  Evaluation took: 0:00:03


### Training Model with best config and whole training data

In [4]:
api = wandb.Api()
# best_sweep = 'nbovtee3' #bert
# best_sweep = 'embywnlj' #roberta
best_sweep = '2afv0m0i' #bertweet
sweep = api.sweep("cemulu/Transformers_For_ClaimWorthiness/" + best_sweep)

In [5]:
best_run = sweep.best_run()
best_run.summary.get("avg_val_mAP")

[34m[1mwandb[0m: Sorting runs by -summary_metrics.avg_val_mAP


0.7651173954688729

In [6]:
worthiness_checker = utils.worthiness_checker.WorthinessChecker(best_run, constants)

Epoch configuration of the best run:
36
Early stopped at:
                   36    66    123    156    192    240
fold_index         1.0   2.0   3.0    4.0    5.0    6.0
cumulative_epoch  12.0  45.0  90.0  118.0  155.0  195.0
epoch_of_fold     12.0   9.0  18.0   10.0   11.0   15.0

Average epoch used as a reference for early stopping:  8


In [7]:
# optimized_model = worthiness_checker.train_full_model()

# model_file_name = 'bert-base-uncased_0.7332254639950794.pt' # bert
# model_file_name = 'roberta-base_0.7551132534277406.pt' # roberta
model_file_name = 'vinai_bertweet-covid19-base-uncased_0.7651173954688729.pt' # bertweet
PATH = os.path.join(parent_dir, 'Model', model_file_name)
worthiness_checker.load_model(PATH)

In [8]:
model_name = worthiness_checker.config.model_name.replace('/','_')
mAP = best_run.summary.get("avg_val_mAP")

PATH = os.path.join(parent_dir, 'Model','{}_{}.pt'.format(model_name, mAP))
# torch.save(optimized_model.state_dict(), PATH)

### Single Tests

In [69]:
tweet = "UK Health Minister Nadine Dorries has tested positive for COVID-19."

In [10]:
tweet = "i am positive"

In [9]:
tweet = "sheep is black"

In [79]:
tweet = "Recent research suggests that 15 percent of abortions are the result of coercion."

In [23]:
tweet = '''A Democratic bill negotiated between Sens. Joe Manchin and Chuck Schumer would "increase taxes on millions of Americans across every income bracket."'''


In [77]:
tweet = '''Nancy Pelosi and Democrats "want to turn 150 million Americans into felons overnight" with HR 1808.'''

In [32]:
tweet = '''John Fetterman wants to “eliminate life sentences for murderers."'''

In [75]:
tweet = '''"The Sun is out of place, the Moon is out of place and the stars are out of place. The compasses are off" because of a shift in the Earth’s poles.'''

In [73]:
tweet = "China threatens to shoot Nancy Pelosi’s plane down if she visits Taiwan."

In [16]:
tweet = "In Virginia, we actually do protect same-sex marriage."

In [80]:
probability = worthiness_checker.predict(tweet)

Recent research suggests that 15 percent of abortions are the result of coercion.
This expression contains a check-worthy claim with a 85.39% conficency 


In [33]:
probability

0.9899057745933533

### Batch Testing

In [104]:
# _, test_df = worthiness_checker.get_data_from_file(worthiness_checker.config.data_version)
_, test_df = worthiness_checker.get_data_from_file()

In [119]:
test_df

Unnamed: 0,topic_id,tweet_id,tweet_url,tweet_text,claim,claim_worthiness
0,covid-19,1237160250513522688,https://twitter.com/user/status/12371602505135...,POTUS wanted everyone to know he was in close ...,0,1
1,covid-19,1237125962871037953,https://twitter.com/user/status/12371259628710...,Who would you prefer to lead our nation’s resp...,0,0
2,covid-19,1237207721604235264,https://twitter.com/user/status/12372077216042...,It was a really really really really really re...,0,0
3,covid-19,1237178597024108552,https://twitter.com/user/status/12371785970241...,Bald-faced #LIE. @replouiegohmert did self-qua...,0,1
4,covid-19,1237049051058561024,https://twitter.com/user/status/12370490510585...,LIVE: Daily media briefing on #COVID19 with @D...,0,0
...,...,...,...,...,...,...
135,covid-19,1237094362544209920,https://twitter.com/user/status/12370943625442...,This is how the Corona Virus is spreading and ...,0,0
136,covid-19,1237063585580318720,https://twitter.com/user/status/12370635855803...,Markets are crashing. Tourism is dying. Travel...,0,0
137,covid-19,1237159267070664705,https://twitter.com/user/status/12371592670706...,Pray for Palestine. State of Emergency. - 27 c...,0,0
138,covid-19,1236972990492897281,https://twitter.com/user/status/12369729904928...,ADVISORY TO STUDENTS: Class suspensions were m...,0,0


In [122]:
test_df["tweet_url"]

0      https://twitter.com/user/status/12371602505135...
1      https://twitter.com/user/status/12371259628710...
2      https://twitter.com/user/status/12372077216042...
3      https://twitter.com/user/status/12371785970241...
4      https://twitter.com/user/status/12370490510585...
                             ...                        
135    https://twitter.com/user/status/12370943625442...
136    https://twitter.com/user/status/12370635855803...
137    https://twitter.com/user/status/12371592670706...
138    https://twitter.com/user/status/12369729904928...
139    https://twitter.com/user/status/12371776752126...
Name: tweet_url, Length: 140, dtype: object

In [107]:
test_dataset = worthiness_checker.create_dataset(test_df)
_, test_dataloader = ret_dataloader(worthiness_checker.config.batch_size, _, test_dataset)

In [108]:
def evaluate_one_batch(batch, self):
    b_input_ids = batch[0].to(self.constants.device)
    b_labels = batch[1].to(self.constants.device)
    with torch.no_grad():        
        probability = self.model(b_input_ids).flatten()
        loss = self.constants.loss_function(probability, b_labels)
    probability = probability.detach().cpu()
    return probability, loss

In [109]:
probability_list = torch.Tensor(0)
for batch in test_dataloader:
    probability, _ = evaluate_one_batch( batch, worthiness_checker)
    probability_list = torch.cat((probability_list, probability), axis=0)


In [110]:
probability_list

tensor([0.9831, 0.0064, 0.0049, 0.9904, 0.0054, 0.0061, 0.0183, 0.9927, 0.0044,
        0.9852, 0.9887, 0.9920, 0.9891, 0.9934, 0.9899, 0.0127, 0.0035, 0.9874,
        0.0046, 0.9915, 0.0063, 0.9077, 0.0062, 0.9913, 0.0075, 0.9903, 0.1583,
        0.9932, 0.9856, 0.5857, 0.0050, 0.7502, 0.9834, 0.0082, 0.9934, 0.8563,
        0.0082, 0.0246, 0.0106, 0.0056, 0.0529, 0.9910, 0.9647, 0.8628, 0.9782,
        0.9343, 0.0064, 0.9899, 0.9913, 0.0097, 0.3014, 0.9915, 0.0052, 0.9913,
        0.0081, 0.9796, 0.9159, 0.9915, 0.0063, 0.9866, 0.9816, 0.0053, 0.0071,
        0.0058, 0.0093, 0.9859, 0.0043, 0.9893, 0.0125, 0.0075, 0.9776, 0.9291,
        0.3711, 0.9876, 0.9915, 0.4031, 0.0157, 0.9886, 0.0054, 0.9927, 0.0061,
        0.9842, 0.7175, 0.4366, 0.0051, 0.0091, 0.1827, 0.0075, 0.5625, 0.0156,
        0.9870, 0.9893, 0.9455, 0.6578, 0.9918, 0.0061, 0.0369, 0.9877, 0.0251,
        0.9883, 0.9844, 0.0188, 0.0058, 0.0079, 0.1959, 0.0039, 0.2818, 0.0076,
        0.9371, 0.9891, 0.0085, 0.0048, 

In [111]:
predictions =  [int(i > .5) for i in probability_list]


In [112]:
predictions

[1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1]

In [113]:
len(predictions)

140

In [105]:
pd.set_option("display.max_colwidth", 200)

In [114]:
# model_prefix = 'bert_'
model_prefix = 'roberta_'
# model_prefix = 'bertweet_'

In [12]:
# eval_df = test_df.copy()
eval_df = pd.read_csv(os.path.join(data_dir,"eval_df.csv"))

In [115]:
eval_df[model_prefix + 'predictions'] = predictions
eval_df[model_prefix + 'probability'] = probability_list
eval_df

Unnamed: 0,tweet_text,check_worthiness,bert_predictions,bert_probability,roberta_predictions,roberta_probability,bertweet_predictions,bertweet_probability
0,POTUS wanted everyone to know he was in close ...,1,1,0.993852,1,0.983054,1,0.988866
1,Who would you prefer to lead our nation’s resp...,0,0,0.000108,0,0.006362,0,0.006661
2,It was a really really really really really re...,0,0,0.000569,0,0.004905,0,0.007607
3,Bald-faced LIE. did self-quarantine until CDC ...,1,1,0.999796,1,0.990378,1,0.990838
4,LIVE: Daily media briefing on COVID-19 with CO...,0,0,0.000080,0,0.005420,0,0.007499
...,...,...,...,...,...,...,...,...
135,This is how the COVID-19 is spreading and this...,0,1,0.999796,1,0.987439,0,0.009059
136,Markets are crashing. Tourism is dying. Travel...,0,1,0.992674,1,0.986089,0,0.143284
137,Pray for Palestine. State of Emergency. - 27 c...,0,0,0.001381,1,0.992275,0,0.314902
138,ADVISORY TO STUDENTS: Class suspensions were m...,0,0,0.000124,0,0.005456,0,0.006276


In [13]:
eval_df

Unnamed: 0,tweet_id,tweet_url,tweet_text,check_worthiness,bert_predictions,bert_probability,roberta_predictions,roberta_probability,bertweet_predictions,bertweet_probability,gpt3_predictions,gpt3_probability,embeddings
0,1237160250513522688,https://twitter.com/user/status/12371602505135...,POTUS wanted everyone to know he was in close ...,1,1,0.993853,1,0.983054,1,0.988866,1,0.890893,"[0.16674243, 0.3065092, -0.112421855, 0.048177..."
1,1237125962871037953,https://twitter.com/user/status/12371259628710...,Who would you prefer to lead our nation’s resp...,0,0,0.000108,0,0.006362,0,0.006661,0,0.999922,"[0.22938012, 0.054673575, -0.0858, -0.07526214..."
2,1237207721604235264,https://twitter.com/user/status/12372077216042...,It was a really really really really really re...,0,0,0.000569,0,0.004905,0,0.007607,0,0.959031,"[0.03409792, 0.45846257, -0.015111784, 0.25196..."
3,1237178597024108552,https://twitter.com/user/status/12371785970241...,Bald-faced LIE. did self-quarantine until CDC ...,1,1,0.999796,1,0.990378,1,0.990838,1,0.971630,"[0.008557552, -0.16238855, -0.34488454, 0.0608..."
4,1237049051058561024,https://twitter.com/user/status/12370490510585...,LIVE: Daily media briefing on COVID-19 with CO...,0,0,0.000080,0,0.005420,0,0.007499,0,0.999801,"[-0.4223225, 0.272865, -0.1823175, -0.44944727..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,1237094362544209920,https://twitter.com/user/status/12370943625442...,This is how the COVID-19 is spreading and this...,0,1,0.999796,1,0.987439,0,0.009059,0,0.760160,"[-0.010497776, -0.110912405, -0.053065408, -0...."
136,1237063585580318720,https://twitter.com/user/status/12370635855803...,Markets are crashing. Tourism is dying. Travel...,0,1,0.992674,1,0.986089,0,0.143284,0,0.597120,"[-0.0055288733, 0.30287853, -0.058681164, -0.0..."
137,1237159267070664705,https://twitter.com/user/status/12371592670706...,Pray for Palestine. State of Emergency. - 27 c...,0,0,0.001381,1,0.992275,0,0.314902,1,0.737024,"[-0.108859845, 0.18808445, 0.0026538572, 0.043..."
138,1236972990492897281,https://twitter.com/user/status/12369729904928...,ADVISORY TO STUDENTS: Class suspensions were m...,0,0,0.000124,0,0.005456,0,0.006276,0,0.999680,"[0.105307795, 0.12508944, 0.03175255, -0.10886..."


In [126]:
eval_df.insert(0, 'tweet_url', test_df["tweet_url"])

In [128]:
eval_df.insert(0, 'tweet_id', test_df["tweet_id"])

In [130]:
eval_df.to_csv("eval_df.csv", encoding='utf-8', index=False)

False Positives:

* (output = 0.986627) the number of COVID-19 cases in the US surpasses 1,000 with 1,004 people in 37 states and DC testing positive for COVID-19, plus 31 deaths. This is just beginning the acceleration phase of the Cor...

* (output = 0.987125) Italy's Prime Minister Giuseppe Conte has announced that the whole of the country is being put on lockdown in an attempt to contain the COVID-19 outbreak. For the latest on COVID-19, click here:

***

* (output = 0.986261) The empire is striking back. The COVID-19 is now being used as a weapon to destabilize the US economy because that the powers that be feel that’s the only way they can get rid of Trump and regain ...

* (output = 0.857124) As two epidemics - COVID-19 and Brexit - hit us, don’t let them make you forget: Priti Patel scandal and investigations of Johnson’s Arcuri Russia report referendum crimes lies incompetence etc et...

False Negatives

* (output = 0.006674) Democrats and the Media need to stop using the COVID-19 to politicize things and scare people. It's irresponsible. This is not the time to try and gain political points or headlines from scaring p...

* (output = 0.006982) คำขวัญ Thailand 2020 No privacy No security No democracy No hope No future No mask But we had COVID-19 and stupid government Thank you ธนาธร ไม่เอารัฐประหาร

***

* (output = 0.006344) Italian doctor facing COVID-19 tsunami publishes long, moving thread that culminates with, ‘Is panic really worse than neglect and carelessness during an epidemic of this sort?’ Read the whole fri...

* (output = 0.006693) This thread needs to fly. It shows how the legacy media is USING COVID-19 as a political weapon and even how the SAME reporters are contradicting themselves. This. Is. SICK.

Ommitted Link: THREAD: Fri Jan 31, 2020, a few weeks before #Coronavirus has officially spread to other countries (which led to the bad stock market week Feb 24-Feb 28), the Trump Admin announced travel restrictions on China. Here is some of the reporting it generated. Take Politico of 2/4/20.

Tweet: ""private ny colleges: *closed college for the week because of the COVID-19* CUNY: *installed two new hand sanitizer dispensers*""
Label: 0
Prediction: 1 with 98.75% probability

Zero-shot GPT-3 response: "The sentiment of this tweet is that private colleges are not doing enough to prevent the spread of COVID-19, while CUNY is taking steps to protect its students. This is not a check-worthy claim."

In [108]:
test_df[test_df['predictions'] != test_df['check_worthiness']]

Unnamed: 0,tweet_text,check_worthiness,predictions,probability
15,"Trust Merkel to be the first world leader to say openly what's what: the main point of social distancing measures (quarantines, closures, lockdowns) is to slow the spread of the COVID-19 so that h...",1,0,0.009174
19,The empire is striking back. The COVID-19 is now being used as a weapon to destabilize the US economy because that the powers that be feel that’s the only way they can get rid of Trump and regain ...,0,1,0.986261
20,private ny colleges: *closed college for the week because of the COVID-19* CUNY: *installed two new hand sanitizer dispensers*,0,1,0.98755
22,This thread needs to fly. It shows how the legacy media is USING COVID-19 as a political weapon and even how the SAME reporters are contradicting themselves. This. Is. SICK.,1,0,0.006693
23,"DEVELOPING: confirms directly to that he is self-quarantined after testing negative COVID-19. Meadows has not slowed down, with conference calls preparing for transition WH Chief of Staff. Meadows...",0,1,0.990744
26,"Older adults and people of all ages with severe chronic medical conditions are more likely to develop serious outcomes, including death, if infected with COVID-19. See CDC guidance for people who ...",0,1,0.601119
29,"This was sent to me by a friend from Naples, Italy. The “protezione civile” (civil protection service) has been sending cars (even late at night) with loud speakers urging residents to stay indoor...",0,1,0.986594
31,"Desperate Trump supporters are trying to label COVID-19 the Wuhan virus to distract from the fact that the Trump administration's failures have increased the danger here in the U.S. It's childish,...",1,0,0.356949
32,The COVID-19 has been around forever. The COVID-19 is a different strain just like there’s different strains of the flu. STOP the panic. Take precaution and follow the guidelines for prevention! m...,1,0,0.007784
36,"On COVID2019, said: “One of the theories is perhaps you could take it on the chin, take it all in one go and allow coronvirus to move through the population without really taking as many draconian...",1,0,0.364582


### Creating Contextual Embeddings

In [None]:
embeddings_list = []
for index, row in eval_df.iterrows():
    tweet_embedding = worthiness_checker.get_embedding(tweet)
    tweet_embedding = tweet_embedding.detach().cpu().numpy().tolist()

In [21]:
with torch.no_grad:
    tweet_embedding = worthiness_checker.get_embedding(tweet)

AttributeError: __enter__

In [17]:
type(tweet_embedding)

torch.Tensor

In [22]:
tweet_embedding.detach().cpu().numpy().tolist()

[-0.0495423786342144,
 -0.2645258903503418,
 -0.06946999579668045,
 -0.06895913928747177,
 -0.0009717809152789414,
 0.24937814474105835,
 -0.19349314272403717,
 0.436924546957016,
 0.5833499431610107,
 -0.03563217446208,
 0.20537656545639038,
 0.02478991262614727,
 -0.11125557869672775,
 0.5068895220756531,
 0.5516275763511658,
 -0.17296750843524933,
 -0.2981228232383728,
 0.2281217724084854,
 -0.1513673961162567,
 -0.1396811306476593,
 0.13323469460010529,
 0.2011302262544632,
 0.1468837559223175,
 0.11683841049671173,
 0.1654742807149887,
 0.10736075788736343,
 0.02250497415661812,
 0.021840842440724373,
 0.2533334195613861,
 0.17998504638671875,
 0.4966568648815155,
 0.25112390518188477,
 -0.009767972864210606,
 0.47279277443885803,
 -0.16214367747306824,
 -0.19828131794929504,
 0.2445499151945114,
 -0.1412193477153778,
 0.1275726854801178,
 -0.03928835317492485,
 0.1320190578699112,
 -0.2511498034000397,
 -0.42060813307762146,
 -0.06611617654561996,
 0.5653879046440125,
 0.28243696