In [1]:
import os
import json
import numpy as np
import pandas as pd


In [2]:
from cleanlab.classification import CleanLearning
from cleanlab.filter import find_label_issues

In [3]:
# defining relative paths
parent_dir = os.path.dirname(os.path.abspath(os.getcwd()))
data_dir = os.path.join(parent_dir, "Data")

### Feeding Cleanlab with previously predicted test data

In [4]:
eval_df = pd.read_csv(os.path.join(data_dir,"eval_df.csv"))

In [5]:
def prob2probArray(binary_label, label_prob):
    other_prob = 1 - label_prob
    prob_list = [other_prob, label_prob] if binary_label else [label_prob, other_prob]
    return prob_list # np.asanyarray(prob_list)

In [6]:
pred_probs = eval_df.apply(lambda x: prob2probArray(x.gpt3_predictions, x.gpt3_probability), axis=1)

In [7]:
gpt3_prediction_prob = pred_probs.values.tolist()
gpt3_prediction_prob = np.array(gpt3_prediction_prob)
gpt3_prediction_prob.shape

(140, 2)

In [8]:
ordered_label_issues = find_label_issues(
    labels = eval_df.check_worthiness,
    pred_probs = gpt3_prediction_prob, 
    return_indices_ranked_by = 'self_confidence',
)

In [9]:
ordered_label_issues 

array([120,  38,  59,  76,  51,  63,  42,  15,  23,  36,  55, 121, 119,
       104,  88,  91,  54,  14, 101,  29,  56, 110,  22,  72, 134, 107,
       137,  82,  90])

In [10]:
ordered_label_issues.shape

(29,)

In [None]:
ordered_label_issues

array([120,  38,  59,  76,  51,  63,  42,  15,  23,  36,  55, 121, 119,
       104,  88,  91,  54,  14, 101,  29,  56, 110,  22,  72, 134, 107,
       137,  82,  90])

In [34]:
def print_row_details(_index):
    print(eval_df.iloc[_index].tweet_text)
    print(f"Dataset label: {eval_df.iloc[_index].check_worthiness}")
    print(f"GPT3 Predictions: {eval_df.iloc[_index].gpt3_predictions}")
    print(f"GPT3 probability: {eval_df.iloc[_index].gpt3_probability}")
    print("***")

In [None]:
for _index in ordered_label_issues:
    print_row_details(_index)

In [None]:
false_predictions = eval_df[eval_df.check_worthiness != eval_df.gpt3_predictions]

In [None]:
false_predictions = false_predictions.sort_values('gpt3_probability',ascending=False)

In [None]:
false_predictions.index

In [None]:
ordered_label_issues

In [None]:
pd.set_option("display.max_colwidth", 300)
false_predictions[["tweet_text", "check_worthiness", "gpt3_predictions", "gpt3_probability"]].head(3)

### Feeding Cleanlab with  that previously predicted by cross validation

In [18]:
train_df = pd.read_csv(os.path.join(data_dir,"train_predictions_df.csv"))

In [39]:
train_pred_probs = train_df.apply(lambda x: [1-x.bertweet_probability, x.bertweet_probability], axis=1)

In [41]:
train_pred_probs = train_pred_probs.values.tolist()
train_pred_probs = np.array(train_pred_probs)

In [42]:
ordered_train_label_issues = find_label_issues(
    labels=train_df.check_worthiness,
    pred_probs=train_pred_probs,  # out-of-sample predicted probabilities from any model
    return_indices_ranked_by='self_confidence',
)

In [52]:
def print_row_details(_index):
    print(train_df.iloc[_index].tweet_text)
    print(f"Dataset label: {train_df.iloc[_index].check_worthiness}")
    print(f"Bertweet Predictions: {train_df.iloc[_index].bertweet_predictions}")
    print(f"Bertweet probability: {train_df.iloc[_index].bertweet_probability}")
    print("\n***\n")

In [None]:
for _index in ordered_train_label_issues:
    print_row_details(_index)

In [56]:
label_issues_list = find_label_issues(
    labels=train_df.check_worthiness,
    pred_probs=train_pred_probs,  # out-of-sample predicted probabilities from any model
)

In [59]:
train_df["label_issue"] = label_issues_list

In [77]:
filtered_train_df = train_df[(train_df["label_issue"]==False)]

In [80]:
filtered_train_df.to_csv("train_english_filtered.tsv", encoding='utf-8', index=False, sep='\t')

In [72]:
train_df[train_df["check_worthiness"]==train_df["bertweet_predictions"] ]

Unnamed: 0,tweet_text,check_worthiness,bertweet_predictions,bertweet_probability,label_issue
0,Since this will never get reported by the medi...,1,1,0.635311,False
1,"Thanks, MichaelBloomberg. Here’s a handy littl...",0,0,0.010500,False
2,"Folks, when you say ""The COVID-19 isn't a big ...",0,0,0.024375,False
5,"I live in Seattle, I have all symptoms of COVI...",0,0,0.039284,False
6,"my dad said ""why don't they just cure COVID-19...",0,0,0.032007,False
...,...,...,...,...,...
817,Stop spreading fake news COVID-19,0,0,0.102101,False
818,It's fake! It's fake!' shout residents of a co...,1,1,0.983913,False
819,Be Smart about COVID-19: 1⃣ follow accurate pu...,0,0,0.006635,False
820,"On the left: , a Qatari puppet, attacks Saudi ...",1,1,0.988063,False


In [75]:
train_df[(train_df["label_issue"]==False) & (train_df["check_worthiness"]!=train_df["bertweet_predictions"]) ]

Unnamed: 0,tweet_text,check_worthiness,bertweet_predictions,bertweet_probability,label_issue
291,I tweeted a link to an COVID-19 tracker and so...,0,1,0.571549,False
398,COVID-19 cases increase exponentially. China 1...,1,0,0.294081,False
401,Reaction from various teams after hearing IPL ...,0,1,0.567785,False
420,With recorded COVID-19 cases (outside china) s...,0,1,0.527964,False
435,Soap kills COVID-19 - the molecular chemistry ...,1,0,0.490705,False
454,When the Seattle researchers found COVID-19 in...,1,0,0.332599,False
628,Hungarian Justice Min. sends bill to Parliamen...,1,0,0.42544,False
690,There's not a single confirmed case of an Asia...,0,1,0.54906,False
772,"Just like all the other fake stuff they do, th...",1,0,0.445672,False
774,hakyeon: ive donated money toward overcoming t...,1,0,0.396126,False


In [45]:
train_df.iloc[654]

tweet_text              Garlic may be tasty, but it WON'T protect you ...
check_worthiness                                                        1
bertweet_predictions                                                    0
bertweet_probability                                             0.006102
Name: 654, dtype: object

In [44]:
ordered_train_label_issues

array([654,  98, 170, 258, 378, 217, 356, 471, 719, 283, 226, 586, 102,
       123, 555,  65, 514, 190, 494, 457, 737,  83, 455,  86, 446, 186,
       445, 377, 643, 254, 627, 776, 757, 787, 599, 205, 251, 600, 475,
       796, 162, 572, 206, 280, 327, 428, 333, 790, 248,  16, 209, 461,
       559, 759, 681, 328, 672, 487, 365, 314, 185, 353, 611, 175, 118,
        14, 663, 478, 212,  96, 517, 811, 706, 335, 622, 336, 752,  39,
       281, 659,  31, 624, 126, 239, 608,  18, 367, 739, 709, 641, 786,
       781,  77, 585, 490, 325, 778,  29, 379, 779, 173, 749, 506, 789,
       149,  28, 637, 290,  55, 357, 651, 322, 619, 697, 730, 688, 234,
       682, 271,   3, 355, 125, 617, 203, 145, 178, 660, 812, 284, 237,
       289, 397,  22, 626,  82, 689, 636, 166, 771, 158, 802, 341,   4,
       207, 272, 364, 633, 459, 297,  93,  80, 529])

In [24]:
train_pred_probs

0               [0.36468893, 0.63531107]
1             [0.010499758, 0.989500242]
2             [0.024375355, 0.975624645]
3      [0.15086275000000005, 0.84913725]
4               [0.25631145, 0.74368855]
                     ...                
817           [0.102100775, 0.897899225]
818     [0.016086999999999962, 0.983913]
819           [0.006634871, 0.993365129]
820    [0.011936700000000022, 0.9880633]
821     [0.06201509999999999, 0.9379849]
Length: 822, dtype: object

In [19]:
train_df

Unnamed: 0,tweet_text,check_worthiness,bertweet_predictions,bertweet_probability
0,Since this will never get reported by the medi...,1,1,0.635311
1,"Thanks, MichaelBloomberg. Here’s a handy littl...",0,0,0.010500
2,"Folks, when you say ""The COVID-19 isn't a big ...",0,0,0.024375
3,Just 1 case of COVID-19 in India and people ar...,0,1,0.849137
4,President made a commitment to donate his sala...,1,0,0.256311
...,...,...,...,...
817,Stop spreading fake news COVID-19,0,0,0.102101
818,It's fake! It's fake!' shout residents of a co...,1,1,0.983913
819,Be Smart about COVID-19: 1⃣ follow accurate pu...,0,0,0.006635
820,"On the left: , a Qatari puppet, attacks Saudi ...",1,1,0.988063


### Feeding Cleanlab with raw classifier and training data

In [4]:
from skorch import NeuralNetClassifier
from sklearn.model_selection import cross_val_predict
import torch
from torch import nn
import wandb

#Custom modules
import sys
if ".." not in sys.path:
    sys.path.append('../')
import utils
from utils import custom_models, early_stopping, worthiness_checker, constants

In [5]:
os.environ["WANDB_SILENT"] = "true"
os.environ["WANDB_NOTEBOOK_NAME"] = 'TransformersForClaimWorthiness.ipynb'

# Constants
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
parent_dir = os.path.dirname(os.path.abspath(os.getcwd()))
seed_list = [7, 42] # seed_list = [7, 42, 127]
fold_count = 3 #5
patience=5
loss_function = nn.BCELoss()

metric_types = np.dtype(
    [
        ("mAP", float),
        ("auc", float),
        ("accuracy", float),
        ("precision", float),
        ("recall", float),
        ("f1", float),
        ("mcc", float),
        ("log_loss", float),
        ("loss", float)
    ])

In [6]:
constants = constants.Constants()
constants.device = device
constants.parent_dir = parent_dir
constants.seed_list = seed_list
constants.fold_count = fold_count
constants.patience = patience
constants.loss_function = loss_function
constants.metric_types = metric_types

In [7]:
api = wandb.Api()
best_sweep = '2afv0m0i' #bertweet
sweep = api.sweep("cemulu/Transformers_For_ClaimWorthiness/" + best_sweep)
best_run = sweep.best_run()
best_run.summary.get("avg_val_mAP")

[34m[1mwandb[0m: Sorting runs by -summary_metrics.avg_val_mAP


0.7651173954688729

In [8]:
worthiness_checker = utils.worthiness_checker.WorthinessChecker(best_run, constants)

Epoch configuration of the best run:
36
Early stopped at:
                   36    66    123    156    192    240
fold_index         1.0   2.0   3.0    4.0    5.0    6.0
cumulative_epoch  12.0  45.0  90.0  118.0  155.0  195.0
epoch_of_fold     12.0   9.0  18.0   10.0   11.0   15.0

Average epoch used as a reference for early stopping:  8


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
worthiness_checker.load_raw_model()

Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaModel: ['lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [10]:
model_skorch = NeuralNetClassifier(worthiness_checker.model)

In [11]:
train_df = pd.read_csv(os.path.join(parent_dir, 'Data','train_english_cleaned_without_mentions.tsv'), delimiter='\t')

In [None]:
train_df.index.values.tolist()

In [None]:
train_df.iloc[252].tweet_text          