In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install transformers
!pip install pytorch-nlp



In [None]:
import sys
import os

os.chdir('/content/drive/MyDrive/NLPProject/bias' )
sys.path.append('/content/drive/MyDrive/NLPProject')
sys.path.append('/content/drive/MyDrive/NLPProject/bias/')
os.environ['OMP_NUM_THREADS'] = "1"

import argparse
import pandas as pd
import pickle
from model.generator import TransformerDataset, transformer_collate
from model.bertmodel import MyBertModel
from model.emobertmodel import emoMyBertModel
from model.emo_Attention2_bertmodel import emoAtt2MyBertModel
from model.lstmmodel import LSTMModel
import torch
from parameters import BERT_MODEL_PATH, CLAIM_ONLY, CLAIM_AND_EVIDENCE, EVIDENCE_ONLY, DEVICE, INPUT_TYPE_ORDER
from transformers import AdamW
import numpy as np
from utils.utils import print_message, clean_str, preprocess
from sklearn.metrics import f1_score
from sklearn.utils.class_weight import compute_class_weight
from collections import Counter
from torchnlp.word_to_vector import GloVe
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
#from hypopt import GridSearch
from model_selection import GridSearch
from tqdm import tqdm


# load the right dataset for different configurations, if preprocessing
# is set, it will preprocess the dataset
def load_data(dataset, step = 'none'):
    #path = "../../multi_fc_publicdata/" + dataset + "/"

    path = "../multi_fc_publicdata/" + dataset + "/"
    print(f'load data function: step = {step}')
    if step == 'formal' or step == 'informal':
      print('**** load data: formal or informal *****') 
      main_data = pd.read_csv(path + dataset + "_" + step + ".tsv", sep="\t", header=None)
      snippets_data = pd.read_csv(path + dataset + "_" + step + "_snippets.tsv", sep="\t", header=None)    
    elif step == 'none' or step == 'EMO_LEXI' or step == 'EMO_INT' or step == 'EMO_ATT2_LEXI' or step == 'EMO_ATT2_INT':
      print('****load data: none-preprocess, EMO_LEXI, EMO_INT, EMO_ATT2_LEXI, EMO_ATT2_INT *****') 
      main_data = pd.read_csv(path + dataset + ".tsv", sep="\t", header=None)
      snippets_data = pd.read_csv(path + dataset + "_snippets.tsv", sep="\t", header=None)    
    elif step == 'neutralized': 
      print('****load data: neutralized *****') 
      main_data = pd.read_csv(path + dataset + "_" + step + ".tsv", sep="\t", header=None)     
      snippets_data = pd.read_csv(path + dataset + "_snippets.tsv", sep="\t", header=None)    
    else: 
      print('****load data: preprocess *****') 
      main_data = pd.read_csv(path + dataset + ".tsv", sep="\t", header=None)
      snippets_data = pd.read_csv(path + dataset + "_snippets.tsv", sep="\t", header=None)

      for index, row in main_data.iterrows():
        main_data[1][index] = preprocess(row[1], step)     
      
          
      for index, row in snippets_data.iterrows():
        snippets_data[1][index] = preprocess(row[1], step)
        snippets_data[2][index] = preprocess(row[2], step)
        snippets_data[3][index] = preprocess(row[3], step)
        snippets_data[4][index] = preprocess(row[4], step)
        snippets_data[5][index] = preprocess(row[5], step)
        snippets_data[6][index] = preprocess(row[6], step)
        snippets_data[7][index] = preprocess(row[7], step)
        snippets_data[8][index] = preprocess(row[8], step)
        snippets_data[9][index] = preprocess(row[9], step)
        snippets_data[10][index] = preprocess(row[10], step)
    
    label_order = pickle.load(open(path + dataset + "_labels.pkl", "rb"))
    splits = pickle.load(open(path + dataset + "_index_split.pkl", "rb"))

    return main_data, snippets_data, label_order, splits

# Generating dataset loader for the claims and snippet data.
def make_generators(main_data, snippets_data, label_order, splits, params, dataset_generator=TransformerDataset, other_dataset=False):
    generators = []

    all_labels = main_data.values[:,2]
    counter = Counter(all_labels)
    ss = ""
    for c in label_order:
        ss = ss + ", " + str(c) + " (" + str(np.around(counter[c]/len(all_labels) * 100,1)) + "\%)"
        #print(c, np.around(counter[c]/len(all_labels) * 100,1), "%", counter[c])
    print("len", len(all_labels), ss)

    for isplit, split in enumerate(splits):
        # print(f'isplit {isplit}')
        sub_main_data = main_data.values[split]
        # print(f'len sub_main_data: {len(sub_main_data)}')
        
        sub_snippets_data = snippets_data.values[split]
        # print(f'len sub_snippets_data: {len(sub_snippets_data)}')

        

        tmp = dataset_generator(sub_main_data, sub_snippets_data, label_order)
        if isplit == 0:
            generator = torch.utils.data.DataLoader(tmp, **params[0])
        else:
            generator = torch.utils.data.DataLoader(tmp, **params[1])

        generators.append(generator)

        # print(sub_main_data)
        # print(sub_snippets_data)
        # print(f'tmp: \n {tmp[0]}')
        # gen0 = next(iter(generator))
        # print(f'gen0: \n {gen0}')


    # make class weights
    labels = main_data.values[splits[0]][:,2]
    labels = np.array([label_order.index(v) for v in labels])


    if not other_dataset:
        label_weights = torch.tensor(compute_class_weight("balanced", classes=np.arange(len(label_order)), y=labels).astype(np.float32))
    else:
        label_weights = None

    return generators[0], generators[1], generators[2], label_weights

# evaluate the f1micro and f1 macro scores
def evaluate(generator, model, other_from=None, ignore_snippet=None):
    all_labels = []
    all_predictions = []

    all_claimIDs = []
    all_logits = []

    for vals in generator:
        claimIDs, claims, labels, snippets = vals[0], vals[1], vals[2], vals[3]

        if ignore_snippet is not None:
            for i in range(len(snippets)):
                snippets[i][ignore_snippet] = "filler"

        all_labels += labels
        logits = model(claims, snippets)

        predictions = torch.argmax(logits, 1).cpu().numpy()

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            predictions[predictions == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        all_predictions += predictions.tolist()

        all_claimIDs += claimIDs
        all_logits += logits.cpu().numpy().tolist()

    f1_micro = f1_score(all_labels, all_predictions, average="micro")
    f1_macro = f1_score(all_labels, all_predictions, average="macro")

    return f1_micro, f1_macro, all_claimIDs, all_logits, all_labels, all_predictions

def train_step(optimizer, vals, model, criterion):
    optimizer.zero_grad()

    claimIDs, claims, labels, snippets = vals[0], vals[1], torch.tensor(vals[2]).to(DEVICE), vals[3]

    logits = model(claims, snippets)
    loss = criterion(logits, labels)

    loss.backward()
    optimizer.step()

    return loss


# get embedding matric according to Glove('840B')
def get_embedding_matrix(generators, dataset, min_occurrence=1):
    savename = "preprocessed/" + dataset + "_glove.pkl"
    if os.path.exists(savename):
        tmp = pickle.load(open(savename, "rb"))
        glove_embedding_matrix = tmp[0]
        word2idx = tmp[1]
        idx2word = tmp[2]
        return glove_embedding_matrix, word2idx, idx2word

    glove_vectors = GloVe('840B')
    all_claims = []
    all_snippets = []
    for gen in generators:
        for vals in gen:
            claims = vals[1]
            claims = [clean_str(v) for v in claims]
            snippets = vals[3]
            snippets = [clean_str(item) for sublist in snippets for item in sublist]

            all_claims += claims
            all_snippets += snippets

    all_words = [word for v in all_claims+all_snippets for word in v.split(" ")]
    counter = Counter(all_words)
    all_words = set(all_words)
    all_words = list(set([word for word in all_words if counter[word] > min_occurrence]))
    word2idx = {word: i+2 for i, word in enumerate(all_words)} # reserve 0 for potential mask and 1 for unk token
    idx2word = {word2idx[key]: key for key in word2idx}

    num_words = len(idx2word)

    glove_embedding_matrix = np.random.random((num_words+2, 300)) - 0.5
    missed = 0
    for word in word2idx:
        if word in glove_vectors:
            glove_embedding_matrix[word2idx[word]] = glove_vectors[word]
        else:
            missed += 1

    pickle.dump([glove_embedding_matrix, word2idx, idx2word], open(savename, "wb"))
    return glove_embedding_matrix, word2idx, idx2word

def train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("model parameters", params)

    num_epochs = 0
    patience_counter = 0
    # patience_max = 1
    patience_max = 8

    best_f1 = -np.inf
    while (True):
        train_losses = []

        model.train()
        for ivals, vals in enumerate(train_generator):
            loss = train_step(optimizer, vals, model, criterion)
            train_losses.append(loss.item())

        num_epochs += 1
        print_message("TRAIN loss", np.mean(train_losses), num_epochs)

        if num_epochs % args.eval_per_epoch == 0:
            model.eval()
            with torch.no_grad():
                val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions = evaluate(val_generator, model)
                print_message("VALIDATION F1micro, F1macro, loss:", val_f1micro, val_f1macro, len(val_claimIDs))

            if val_f1macro > best_f1:
                with torch.no_grad():
                    test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions = evaluate(test_generator, model)
                    print_message("TEST F1micro, F1macro, loss:", test_f1micro, test_f1macro, len(test_claimIDs))

                    other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions = evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt")
                    print_message("OTHER-TEST F1micro, F1macro, loss:", other_test_f1micro, other_test_f1macro, len(other_test_claimIDs))

                    test_remove_top_bottom = []
                    test_remove_bottom_top = []
                    other_test_remove_top_bottom = []
                    other_test_remove_bottom_top = []
                    ten = np.arange(10)
                    if args.inputtype != "CLAIM_ONLY":
                        for i in tqdm(range(10)):
                            top_is = ten[:(i+1)]
                            bottom_is = ten[-(i+1):]
                            test_remove_top_bottom.append( evaluate(test_generator, model, ignore_snippet=top_is) )
                            test_remove_bottom_top.append( evaluate(test_generator, model, ignore_snippet=bottom_is) )
                            other_test_remove_top_bottom.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=top_is))
                            other_test_remove_bottom_top.append(evaluate(other_generator, model, other_from="snes" if args.dataset == "pomt" else "pomt", ignore_snippet=bottom_is))

                        print_message([np.around(v[1], 4) for v in test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in test_remove_bottom_top])
                        print_message([np.around(v[1], 4) for v in other_test_remove_top_bottom])
                        print_message([np.around(v[1], 4) for v in other_test_remove_bottom_top])

                patience_counter = 0
                best_f1 = val_f1macro
                val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
                test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions, test_remove_top_bottom, test_remove_bottom_top]
                other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits, other_test_labels, other_test_predictions, other_test_remove_top_bottom, other_test_remove_bottom_top]
                misc_store = [args]
                total_store = [val_store, test_store, other_test_store, misc_store]
            else:
                patience_counter += 1

            print_message("PATIENCE", patience_counter, "/", patience_max)

            if patience_counter >= patience_max:
                pickle.dump(total_store, open(savename, "wb"))
                break

# run the bert model
def run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator, step):
    print(f'***run_bert*** with inputtype {args.inputtype}')

    if step == 'EMO_INT' or step == 'EMO_LEXI':      
      model = emoMyBertModel.from_pretrained(BERT_MODEL_PATH, labelnum=len(label_order), input_type=inputtype, emocred_type = step)
    elif step == 'EMO_ATT2_INT' or step == 'EMO_ATT2_LEXI':      
      model = emoAtt2MyBertModel.from_pretrained(BERT_MODEL_PATH, labelnum=len(label_order), input_type=inputtype, emocred_type = step)
    else:
      model = MyBertModel.from_pretrained(BERT_MODEL_PATH, labelnum=len(label_order), input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

# run lstm model
def run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_generator):
    print(f'***run_lstm*** with inputtype {args.inputtype}')
    glove_embedding_matrix, word2idx, idx2word = get_embedding_matrix([train_generator, val_generator, test_generator, other_generator], args.dataset)

    model = LSTMModel(args.lstm_hidden_dim, args.lstm_layers, args.lstm_dropout, len(label_order), word2idx, glove_embedding_matrix, input_type=inputtype)
    model.to(DEVICE)

    criterion = torch.nn.CrossEntropyLoss(weight=label_weights.to(DEVICE))
    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, eps=1e-8)
    optimizer.zero_grad()

    train_model(model, criterion, optimizer, train_generator, val_generator, test_generator, args, other_generator, savename)

def filter_snippet_for_bow(generator, ignore_snippet, inputtype):
    samples = []
    for vals in generator:
        claims = vals[1]
        labels = vals[2]
        snippets = vals[3]

        for i in range(len(snippets)):
            snippets[i][ignore_snippet] = "filler"

        for i in range(len(claims)):
            if inputtype == CLAIM_AND_EVIDENCE:
                sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
            elif inputtype == CLAIM_ONLY:
                sample = clean_str(claims[i])
            elif inputtype == EVIDENCE_ONLY:
                sample = " ".join([clean_str(v) for v in snippets[i]])
            else:
                raise Exception("Unknown type", inputtype)
            samples.append(sample)
    return samples

def get_bows_labels(generators, dataset, inputtype):
    all_samples = []
    all_labels = []

    for gen in generators:
        gen_samples = []
        gen_labels = []
        for vals in gen:
            claims = vals[1]
            labels = vals[2]
            snippets = vals[3]

            for i in range(len(claims)):
                if inputtype == CLAIM_AND_EVIDENCE:
                    sample = clean_str(claims[i]) + " ".join([clean_str(v) for v in snippets[i]])
                elif inputtype == CLAIM_ONLY:
                    sample = clean_str(claims[i])
                elif inputtype == EVIDENCE_ONLY:
                    sample = " ".join([clean_str(v) for v in snippets[i]])
                else:
                    raise Exception("Unknown type", inputtype)
                gen_samples.append(sample)
                gen_labels.append(labels[i])

        all_samples.append(gen_samples)
        all_labels.append(gen_labels)

    test_remove_top_bottom = []
    test_remove_bottom_top = []
    other_test_remove_top_bottom = []
    other_test_remove_bottom_top = []
    ten = np.arange(10)
    for i in tqdm(range(10)):
        top_is = ten[:(i + 1)]
        bottom_is = ten[-(i + 1):]
        test_remove_top_bottom.append( filter_snippet_for_bow(generators[-2], top_is, inputtype) )
        test_remove_bottom_top.append( filter_snippet_for_bow(generators[-2], bottom_is, inputtype) )
        other_test_remove_top_bottom.append( filter_snippet_for_bow(generators[-1], top_is, inputtype) )
        other_test_remove_bottom_top.append( filter_snippet_for_bow(generators[-1], bottom_is, inputtype) )

    vectorizer = TfidfVectorizer(min_df=2)
    vectorizer.fit([item for sublist in all_samples for item in sublist])

    bows = [vectorizer.transform(all_samples[i]) for i in range(len(all_samples))]

    test_remove_top_bottom = [vectorizer.transform(test_remove_top_bottom[i]) for i in range(len(test_remove_top_bottom))]
    test_remove_bottom_top = [vectorizer.transform(test_remove_bottom_top[i]) for i in range(len(test_remove_bottom_top))]
    other_test_remove_top_bottom = [vectorizer.transform(other_test_remove_top_bottom[i]) for i in range(len(other_test_remove_top_bottom))]
    other_test_remove_bottom_top = [vectorizer.transform(other_test_remove_bottom_top[i]) for i in range(len(other_test_remove_bottom_top))]

    return bows, all_labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top

# random forest model
def run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator):
    # print(f'train_generator0 :\n {next(iter(train_generator))}')
    print(f'***run_bow*** with inputtype {args.inputtype}')

    bows, labels, test_remove_top_bottom, test_remove_bottom_top, other_test_remove_top_bottom, other_test_remove_bottom_top = get_bows_labels([train_generator, val_generator, test_generator, other_test_generator], args.dataset, inputtype)

    train_bow, val_bow, test_bow, other_test_bow = bows[0], bows[1], bows[2], bows[3]
    train_labels, val_labels, test_labels, other_test_labels = labels[0], labels[1], labels[2], labels[3]

    label_weights = label_weights.numpy()
    weights = {i: label_weights[i] for i in range(len(label_weights))}

    # print(f'****** run bow train_bow \n {train_bow}')
    # print('*********')

    param_grid = [
        {'n_estimators': [100, 500, 1000], 'min_samples_leaf': [1, 3, 5, 10], 'min_samples_split': [2, 5, 10]}
    ]

    opt = GridSearch(model=RandomForestClassifier(n_jobs=5, class_weight=weights), param_grid=param_grid, parallelize=False)

    
    opt.fit(train_bow, train_labels, val_bow, val_labels, scoring="f1_macro")

    def rf_eval(model, bow, labels, other_from=None):
        preds = model.predict(bow)

        if other_from == "pomt": # other data is pomt, and model is trained on snes
            # this case is fine
            pass
        elif other_from == "snes": # other data is snes, and model is trained on pomt
            # in this case both "pants on fire!" and "false" should be considered as false
            preds[preds == 0] = 1 # 0 is "pants on fire!" and 1 is "false" for pomt.

        f1_macro = f1_score(labels, preds, average="macro")
        f1_micro = f1_score(labels, preds, average="micro")
        return f1_micro, f1_macro, labels, preds

    # val_store = [val_f1micro, val_f1macro, val_claimIDs, val_logits, val_labels, val_predictions]
    # test_store = [test_f1micro, test_f1macro, test_claimIDs, test_logits, test_labels, test_predictions,test_remove_top_bottom, test_remove_bottom_top]
    # other_test_store = [other_test_f1micro, other_test_f1macro, other_test_claimIDs, other_test_logits,
    #                     other_test_labels, other_test_predictions, other_test_remove_top_bottom,
    #                     other_test_remove_bottom_top]
    #misc_store = [args]


    val_store = rf_eval(opt, val_bow, val_labels)
    test_store = list(rf_eval(opt, test_bow, test_labels)) + [[rf_eval(opt, test_remove_top_bottom[i], test_labels) for i in range(10)],
                                                       [rf_eval(opt, test_remove_bottom_top[i], test_labels) for i in range(10)]]
    other_test_store = list(rf_eval(opt, other_test_bow, other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt")) + [[rf_eval(opt, other_test_remove_top_bottom[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)],
                                                       [rf_eval(opt, other_test_remove_bottom_top[i], other_test_labels, other_from="snes" if args.dataset == "pomt" else "pomt") for i in range(10)]]
    misc_store = [opt.get_best_params()]
    total_store = [val_store, test_store, other_test_store, misc_store]

    print_message("VALIDATION", val_store[0], val_store[1])
    print_message("TEST", test_store[0], test_store[1])
    print_message("OTHER-TEST", other_test_store[0], other_test_store[1])

    print_message([np.around(v[1], 4) for v in test_store[-2]])
    print_message([np.around(v[1], 4) for v in test_store[-1]])
    print_message([np.around(v[1], 4) for v in other_test_store[-2]])
    print_message([np.around(v[1], 4) for v in other_test_store[-1]])
    print(misc_store)

    pickle.dump(total_store, open(savename, "wb"))

def filter_websites(snippets_data):
    bad_websites = ["factcheck.org", "politifact.com", "snopes.com", "fullfact.org", "factscan.ca"]
    ids = snippets_data.values[:, 0]
    remove_count = 0
    for i, id in enumerate(ids):
        with open("../../multi_fc_publicdata/snippets/" + id, "r", encoding="utf-8") as f:
            lines = f.readlines()

        links = [line.strip().split("\t")[-1] for line in lines]
        remove = [False for _ in range(10)]
        for j in range(len(links)):
            remove[j] = any([bad in links[j] for bad in bad_websites])
        remove = remove[:10]  # 1 data sample has 11 links by mistake in the dataset
        snippets_data.iloc[i, [False] + remove] = "filler"

        remove_count += np.sum(remove)
    print_message("REMOVE COUNT", remove_count)
    return snippets_data



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Error loading SnowballStemmer: Package 'SnowballStemmer'
[nltk_data]     not found in index
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
# don't include formal and informal in the steps, need to have separate cell due to we don't have formal and informal data for politifact

# the cell will run all the training and evaluation according to the configurations, 
# and generate the results and store under the results folder
import gc

gc.collect()
class vars():
    def __init__(self, mode, inputtype, dataset):
        if mode == "bow":
            self.dataset = dataset
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bow"
            self.batchsize = 2
            self.eval_per_epoch = 1
            self.lr = 0.0001
        elif mode == 'lstm':
            self.dataset = dataset
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "lstm"
            self.batchsize = 16
            self.eval_per_epoch = 1
            self.lr = 0.0001
            self.lstm_hidden_dim = 128
            self.lstm_layers = 2
            self.lstm_dropout = 0.1
        elif mode == 'bert':
            self.dataset = dataset
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bert"
            if self.dataset == "snes":
              self.batchsize = 6
            elif self.dataset == "pomt":
              self.batchsize = 4
            self.eval_per_epoch = 1
            self.lr = 0.000003            

filepath = 'sorted.uk.word.unigrams'  
word_freq = {}  
count = 0
with open(filepath, encoding= 'utf-8') as f:
    for line in f:
        line = line.rstrip()
        if line:
            x = line.split('\t')
            #print(x)
            #print(key, val)
            #print(str(x[1]))
            word_freq[x[1]] = str(x[0])
        count +=1
        if count > 100000:
            break

steps = [['none'], ['pos'], ['stop'], ['pos', 'stop'], ['EMO_INT'], ['EMO_ATT2_INT'], ['EMO_LEXI'], ['EMO_ATT2_LEXI'], ['neg'], ['stem'], ['pos', 'neg'], ['pos', 'neg', 'stop'], ['all']]

modes = ['bert']
datasets = ['snes', 'pomt']
inputtypes = ['CLAIM_AND_EVIDENCE', 'EVIDENCE_ONLY', 'CLAIM_ONLY']
# inputtypes = ['CLAIM_ONLY']



for step in steps:
  for mode in modes:    
    for dataset in datasets:
      for inputtype in inputtypes:       
        print('***********************************')
        stepstr = "-".join([s for s in step])                
        args = vars(mode, inputtype, dataset)

        if args.filter_websites > 0.5:
            savename = "results/" + "-".join([str(v) for v in [args.filter_websites, args.model, args.dataset, args.inputtype, stepstr, args.lr, args.batchsize]])
        else:
            savename = "results/" + "-".join([str(v) for v in [args.model, args.dataset, args.inputtype, stepstr, args.lr, args.batchsize]])

        if args.model == "lstm":
            savename += "-" + "-".join([str(v) for v in [args.lstm_hidden_dim, args.lstm_layers, stepstr, args.lstm_dropout]])
        savename += ".pkl"
        print(args.inputtype, "-", args.dataset, "-", stepstr)
        inputtype = INPUT_TYPE_ORDER.index(args.inputtype)
        main_data, snippets_data, label_order, splits = load_data(args.dataset, stepstr)

        if args.filter_websites > 0.5:
            snippets_data = filter_websites(snippets_data)

        params = {"batch_size": args.batchsize, "shuffle": True, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}
        eval_params = {"batch_size": args.batchsize, "shuffle": False, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}

        train_generator, val_generator, test_generator, label_weights = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params])

        if stepstr == 'formal' or stepstr == 'informal':
          if args.dataset == "snes":
              main_data, snippets_data, _, splits = load_data("pomt", 'none')
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
              main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
          else:
              main_data, snippets_data, _, splits = load_data("snes", 'stepstr')
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
        else:
          if args.dataset == "snes":
              main_data, snippets_data, _, splits = load_data("pomt", stepstr)
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
              main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
          else:
              main_data, snippets_data, _, splits = load_data("snes", stepstr)
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)


        if args.model == "bert":
            run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator, stepstr)
        elif args.model == "lstm":
            run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
        elif args.model == "bow":
            # print("run bow")
            run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)

        gc.collect()




***********************************
CLAIM_AND_EVIDENCE - snes - pos
load data function: step = pos
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = pos
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype CLAIM_AND_EVIDENCE


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109490694
[Dec 07, 16:04:47] TRAIN loss 1.5640663039442655 1
[Dec 07, 16:04:54] VALIDATION F1micro, F1macro, loss: 0.4792899408284023 0.21822459305007147 507
[Dec 07, 16:05:07] TEST F1micro, F1macro, loss: 0.4891518737672584 0.23226778556933708 1014
[Dec 07, 16:05:43] OTHER-TEST F1micro, F1macro, loss: 0.23739418476260585 0.15680760411858605 2717


100%|██████████| 10/10 [14:55<00:00, 89.54s/it]

[Dec 07, 16:20:39] [0.2263, 0.2159, 0.2172, 0.2147, 0.2097, 0.2078, 0.2073, 0.2024, 0.2006, 0.2006]
[Dec 07, 16:20:39] [0.2309, 0.2304, 0.2215, 0.218, 0.2164, 0.2121, 0.2125, 0.2092, 0.2049, 0.2006]
[Dec 07, 16:20:39] [0.1556, 0.1531, 0.1505, 0.1484, 0.1485, 0.1453, 0.1437, 0.1452, 0.1437, 0.1421]
[Dec 07, 16:20:39] [0.156, 0.1542, 0.1525, 0.1507, 0.1451, 0.1442, 0.1436, 0.1433, 0.1439, 0.1421]
[Dec 07, 16:20:39] PATIENCE 0 / 1





[Dec 07, 16:23:01] TRAIN loss 1.4640504174135827 2
[Dec 07, 16:23:07] VALIDATION F1micro, F1macro, loss: 0.4990138067061144 0.25140321127229415 507
[Dec 07, 16:23:21] TEST F1micro, F1macro, loss: 0.5355029585798816 0.29704594711916166 1014
[Dec 07, 16:23:57] OTHER-TEST F1micro, F1macro, loss: 0.24622745675377256 0.1952403234028403 2717


100%|██████████| 10/10 [14:53<00:00, 89.35s/it]

[Dec 07, 16:38:50] [0.3003, 0.2874, 0.2788, 0.2685, 0.2625, 0.2579, 0.2628, 0.2608, 0.257, 0.257]
[Dec 07, 16:38:50] [0.295, 0.2924, 0.2895, 0.2856, 0.279, 0.275, 0.2707, 0.2683, 0.26, 0.257]
[Dec 07, 16:38:50] [0.1915, 0.1907, 0.1884, 0.1857, 0.1861, 0.1854, 0.1848, 0.183, 0.1841, 0.1828]
[Dec 07, 16:38:50] [0.1957, 0.1915, 0.1871, 0.1888, 0.1861, 0.1839, 0.1838, 0.1815, 0.184, 0.1828]
[Dec 07, 16:38:50] PATIENCE 0 / 1





[Dec 07, 16:41:12] TRAIN loss 1.3716003657192797 3
[Dec 07, 16:41:19] VALIDATION F1micro, F1macro, loss: 0.514792899408284 0.26675307159514117 507
[Dec 07, 16:41:32] TEST F1micro, F1macro, loss: 0.514792899408284 0.28035944986440037 1014
[Dec 07, 16:42:08] OTHER-TEST F1micro, F1macro, loss: 0.2333456017666544 0.17292939078126524 2717


100%|██████████| 10/10 [14:54<00:00, 89.50s/it]

[Dec 07, 16:57:03] [0.2731, 0.2666, 0.2527, 0.2462, 0.2488, 0.2466, 0.2422, 0.244, 0.2411, 0.2411]
[Dec 07, 16:57:03] [0.28, 0.2809, 0.2722, 0.2663, 0.2647, 0.2609, 0.2543, 0.2473, 0.2401, 0.2411]
[Dec 07, 16:57:03] [0.1735, 0.1738, 0.1744, 0.1717, 0.1711, 0.1692, 0.1685, 0.1692, 0.1691, 0.1701]
[Dec 07, 16:57:03] [0.1745, 0.1732, 0.172, 0.1718, 0.167, 0.1672, 0.1667, 0.1687, 0.1698, 0.1701]
[Dec 07, 16:57:03] PATIENCE 0 / 1





[Dec 07, 16:59:25] TRAIN loss 1.249362798155965 4
[Dec 07, 16:59:32] VALIDATION F1micro, F1macro, loss: 0.5404339250493096 0.27565214746741296 507
[Dec 07, 16:59:45] TEST F1micro, F1macro, loss: 0.5581854043392505 0.33561783269503276 1014
[Dec 07, 17:00:21] OTHER-TEST F1micro, F1macro, loss: 0.25653294074346705 0.20023537538067315 2717


100%|██████████| 10/10 [14:55<00:00, 89.55s/it]

[Dec 07, 17:15:17] [0.321, 0.3119, 0.3082, 0.2973, 0.2739, 0.2768, 0.2618, 0.254, 0.2494, 0.251]
[Dec 07, 17:15:17] [0.3367, 0.3257, 0.3211, 0.3143, 0.3062, 0.2852, 0.2797, 0.2733, 0.2587, 0.251]
[Dec 07, 17:15:17] [0.195, 0.1944, 0.193, 0.1912, 0.1889, 0.1873, 0.1824, 0.181, 0.1769, 0.1793]
[Dec 07, 17:15:17] [0.2, 0.1997, 0.1967, 0.1955, 0.1884, 0.1879, 0.1853, 0.1866, 0.1816, 0.1793]
[Dec 07, 17:15:17] PATIENCE 0 / 1





[Dec 07, 17:17:38] TRAIN loss 1.1328072937557827 5
[Dec 07, 17:17:45] VALIDATION F1micro, F1macro, loss: 0.4122287968441814 0.26373430744688225 507
[Dec 07, 17:17:45] PATIENCE 1 / 1
***********************************
EVIDENCE_ONLY - snes - pos
load data function: step = pos
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = pos
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype EVIDENCE_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109486854
[Dec 07, 17:26:43] TRAIN loss 1.5628650216637432 1
[Dec 07, 17:26:48] VALIDATION F1micro, F1macro, loss: 0.6015779092702169 0.2186136750510145 507
[Dec 07, 17:26:57] TEST F1micro, F1macro, loss: 0.6232741617357002 0.24462269149050617 1014
[Dec 07, 17:27:19] OTHER-TEST F1micro, F1macro, loss: 0.2948104527051896 0.17994988598191736 2717


100%|██████████| 10/10 [08:48<00:00, 52.83s/it]

[Dec 07, 17:36:07] [0.2416, 0.2365, 0.2317, 0.2305, 0.2261, 0.2026, 0.1546, 0.0719, 0.0467, 0.0461]
[Dec 07, 17:36:07] [0.2446, 0.2493, 0.2476, 0.2486, 0.2356, 0.2257, 0.2137, 0.1653, 0.0744, 0.0461]
[Dec 07, 17:36:07] [0.179, 0.1825, 0.1895, 0.1878, 0.1891, 0.1721, 0.1438, 0.1, 0.06, 0.0514]
[Dec 07, 17:36:07] [0.1796, 0.1854, 0.1863, 0.1946, 0.1928, 0.1883, 0.1656, 0.1276, 0.0709, 0.0514]
[Dec 07, 17:36:07] PATIENCE 0 / 1





[Dec 07, 17:37:41] TRAIN loss 1.5000538951843172 2
[Dec 07, 17:37:46] VALIDATION F1micro, F1macro, loss: 0.368836291913215 0.24318572790581997 507
[Dec 07, 17:37:55] TEST F1micro, F1macro, loss: 0.3678500986193293 0.24266841051696467 1014
[Dec 07, 17:38:17] OTHER-TEST F1micro, F1macro, loss: 0.2355539197644461 0.1987550967107447 2717


100%|██████████| 10/10 [08:46<00:00, 52.69s/it]

[Dec 07, 17:47:04] [0.2344, 0.2288, 0.2226, 0.218, 0.1994, 0.1892, 0.1666, 0.1458, 0.0646, 0.0439]
[Dec 07, 17:47:04] [0.2446, 0.2341, 0.2221, 0.2305, 0.2194, 0.2215, 0.1936, 0.1668, 0.1397, 0.0439]
[Dec 07, 17:47:04] [0.1955, 0.193, 0.1955, 0.1888, 0.1825, 0.1766, 0.1575, 0.1497, 0.1093, 0.066]
[Dec 07, 17:47:04] [0.1986, 0.1982, 0.1992, 0.1947, 0.1909, 0.1901, 0.1784, 0.1621, 0.1446, 0.066]
[Dec 07, 17:47:04] PATIENCE 0 / 1





[Dec 07, 17:48:38] TRAIN loss 1.4561371342153162 3
[Dec 07, 17:48:43] VALIDATION F1micro, F1macro, loss: 0.5266272189349113 0.22212213318596294 507
[Dec 07, 17:48:43] PATIENCE 1 / 1
***********************************
CLAIM_ONLY - snes - pos
load data function: step = pos
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = pos
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype CLAIM_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.bias', 'predic

model parameters 109486085
[Dec 07, 17:56:48] TRAIN loss 1.563409775092795 1
[Dec 07, 17:56:49] VALIDATION F1micro, F1macro, loss: 0.5088757396449705 0.1975411025936205 507
[Dec 07, 17:56:52] TEST F1micro, F1macro, loss: 0.5285996055226825 0.21724794870192227 1014
[Dec 07, 17:56:59] OTHER-TEST F1micro, F1macro, loss: 0.2602134707397865 0.18644046752082605 2717
[Dec 07, 17:56:59] PATIENCE 0 / 1
[Dec 07, 17:57:29] TRAIN loss 1.490200365213929 2
[Dec 07, 17:57:30] VALIDATION F1micro, F1macro, loss: 0.5483234714003945 0.2331923713567181 507
[Dec 07, 17:57:32] TEST F1micro, F1macro, loss: 0.5621301775147929 0.2531453401131467 1014
[Dec 07, 17:57:38] OTHER-TEST F1micro, F1macro, loss: 0.2716231137283769 0.21085968925072035 2717
[Dec 07, 17:57:39] PATIENCE 0 / 1
[Dec 07, 17:58:08] TRAIN loss 1.4203112035788394 3
[Dec 07, 17:58:10] VALIDATION F1micro, F1macro, loss: 0.5029585798816568 0.2517622221307581 507
[Dec 07, 17:58:12] TEST F1micro, F1macro, loss: 0.514792899408284 0.28064209620647446 1

Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109492231
[Dec 07, 18:13:49] TRAIN loss 1.7441574413835577 1
[Dec 07, 18:14:08] VALIDATION F1micro, F1macro, loss: 0.2503681885125184 0.24592572660979614 1358
[Dec 07, 18:14:48] TEST F1micro, F1macro, loss: 0.25432462274567535 0.25249131367676614 2717
[Dec 07, 18:15:03] OTHER-TEST F1micro, F1macro, loss: 0.6153846153846154 0.2054370110673867 1014


100%|██████████| 10/10 [16:38<00:00, 99.85s/it]

[Dec 07, 18:31:42] [0.2535, 0.2509, 0.2476, 0.2453, 0.243, 0.2381, 0.2318, 0.2256, 0.2201, 0.2192]
[Dec 07, 18:31:42] [0.25, 0.2495, 0.2482, 0.2473, 0.2444, 0.2409, 0.2357, 0.2281, 0.225, 0.2192]
[Dec 07, 18:31:42] [0.2105, 0.2194, 0.2198, 0.2195, 0.228, 0.2273, 0.2238, 0.229, 0.2237, 0.2213]
[Dec 07, 18:31:42] [0.205, 0.2092, 0.2076, 0.2148, 0.2221, 0.2207, 0.23, 0.2305, 0.2246, 0.2213]
[Dec 07, 18:31:42] PATIENCE 0 / 1





[Dec 07, 18:37:58] TRAIN loss 1.657535313707367 2
[Dec 07, 18:38:17] VALIDATION F1micro, F1macro, loss: 0.25920471281296026 0.2547142649387652 1358
[Dec 07, 18:38:56] TEST F1micro, F1macro, loss: 0.2561648877438351 0.2538371698065747 2717
[Dec 07, 18:39:11] OTHER-TEST F1micro, F1macro, loss: 0.6153846153846154 0.21621775095625329 1014


100%|██████████| 10/10 [16:36<00:00, 99.65s/it]

[Dec 07, 18:55:48] [0.2529, 0.2533, 0.2573, 0.2567, 0.2591, 0.2498, 0.2448, 0.2396, 0.2307, 0.2267]
[Dec 07, 18:55:48] [0.2542, 0.2558, 0.2577, 0.2583, 0.2596, 0.2591, 0.2524, 0.2444, 0.2369, 0.2267]
[Dec 07, 18:55:48] [0.2184, 0.2213, 0.2171, 0.2165, 0.2215, 0.2191, 0.2132, 0.2144, 0.2111, 0.2111]
[Dec 07, 18:55:48] [0.216, 0.2253, 0.2184, 0.2092, 0.211, 0.2087, 0.213, 0.2129, 0.2122, 0.2111]
[Dec 07, 18:55:48] PATIENCE 0 / 1





[Dec 07, 19:02:04] TRAIN loss 1.5617298849951986 3
[Dec 07, 19:02:24] VALIDATION F1micro, F1macro, loss: 0.24521354933726067 0.23069338043043874 1358
[Dec 07, 19:02:24] PATIENCE 1 / 1
***********************************
EVIDENCE_ONLY - pomt - pos
load data function: step = pos
****load data: preprocess *****
len 13581 , pants on fire! (10.6\%), false (19.2\%), mostly false (17.0\%), half-true (19.8\%), mostly true (18.8\%), true (14.8\%)
load data function: step = pos
****load data: preprocess *****
len 5069 , pants on fire! (0.0\%), false (64.3\%), mostly false (7.5\%), half-true (12.3\%), mostly true (2.8\%), true (13.0\%)
***run_bert*** with inputtype EVIDENCE_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109487623
[Dec 07, 19:14:47] TRAIN loss 1.7289822589814487 1
[Dec 07, 19:14:58] VALIDATION F1micro, F1macro, loss: 0.2547864506627393 0.21450920328526057 1358
[Dec 07, 19:15:21] TEST F1micro, F1macro, loss: 0.24549135075450865 0.19653431071807947 2717
[Dec 07, 19:15:30] OTHER-TEST F1micro, F1macro, loss: 0.6272189349112426 0.2110724778856487 1014


100%|██████████| 10/10 [09:17<00:00, 55.73s/it]

[Dec 07, 19:24:48] [0.2069, 0.2054, 0.2039, 0.2001, 0.1867, 0.1496, 0.1016, 0.0651, 0.0423, 0.0318]
[Dec 07, 19:24:48] [0.1976, 0.2002, 0.2042, 0.2076, 0.1962, 0.168, 0.1276, 0.0926, 0.07, 0.0318]
[Dec 07, 19:24:48] [0.2121, 0.2055, 0.1857, 0.1726, 0.1744, 0.1739, 0.1683, 0.162, 0.1595, 0.1565]
[Dec 07, 19:24:48] [0.2116, 0.2024, 0.2106, 0.1908, 0.1951, 0.1916, 0.1786, 0.174, 0.1643, 0.1565]
[Dec 07, 19:24:48] PATIENCE 0 / 1





[Dec 07, 19:28:54] TRAIN loss 1.644140041345902 2
[Dec 07, 19:29:05] VALIDATION F1micro, F1macro, loss: 0.29455081001472755 0.2935772123939929 1358
[Dec 07, 19:29:28] TEST F1micro, F1macro, loss: 0.28082443871917556 0.2778169968163087 2717
[Dec 07, 19:29:37] OTHER-TEST F1micro, F1macro, loss: 0.5996055226824457 0.21802350612668597 1014


100%|██████████| 10/10 [09:18<00:00, 55.82s/it]

[Dec 07, 19:38:55] [0.2721, 0.2673, 0.2579, 0.2319, 0.2216, 0.2045, 0.1897, 0.1573, 0.0793, 0.0484]
[Dec 07, 19:38:55] [0.2787, 0.2728, 0.2708, 0.2665, 0.259, 0.2514, 0.2341, 0.212, 0.1739, 0.0484]
[Dec 07, 19:38:55] [0.2323, 0.2412, 0.2354, 0.2246, 0.231, 0.2178, 0.2105, 0.1856, 0.0399, 0.0279]
[Dec 07, 19:38:55] [0.218, 0.2263, 0.2347, 0.2343, 0.2384, 0.2353, 0.2368, 0.2235, 0.206, 0.0279]
[Dec 07, 19:38:55] PATIENCE 0 / 1





[Dec 07, 19:43:01] TRAIN loss 1.5613249539325957 3
[Dec 07, 19:43:12] VALIDATION F1micro, F1macro, loss: 0.27908689248895435 0.26899963294119333 1358
[Dec 07, 19:43:12] PATIENCE 1 / 1
***********************************
CLAIM_ONLY - pomt - pos
load data function: step = pos
****load data: preprocess *****
len 13581 , pants on fire! (10.6\%), false (19.2\%), mostly false (17.0\%), half-true (19.8\%), mostly true (18.8\%), true (14.8\%)
load data function: step = pos
****load data: preprocess *****
len 5069 , pants on fire! (0.0\%), false (64.3\%), mostly false (7.5\%), half-true (12.3\%), mostly true (2.8\%), true (13.0\%)
***run_bert*** with inputtype CLAIM_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.bias', 'predic

model parameters 109486854
[Dec 07, 19:53:37] TRAIN loss 1.7573876176303709 1
[Dec 07, 19:53:42] VALIDATION F1micro, F1macro, loss: 0.23784977908689248 0.21796494672555988 1358
[Dec 07, 19:53:53] TEST F1micro, F1macro, loss: 0.24696356275303644 0.22994632134886647 2717
[Dec 07, 19:53:57] OTHER-TEST F1micro, F1macro, loss: 0.6232741617357002 0.22164150819607942 1014
[Dec 07, 19:53:57] PATIENCE 0 / 1
[Dec 07, 19:56:05] TRAIN loss 1.670549174542682 2
[Dec 07, 19:56:10] VALIDATION F1micro, F1macro, loss: 0.22901325478645065 0.22768909641299806 1358
[Dec 07, 19:56:20] TEST F1micro, F1macro, loss: 0.24254692675745307 0.241188963419992 2717
[Dec 07, 19:56:24] OTHER-TEST F1micro, F1macro, loss: 0.6173570019723866 0.26112979655525975 1014
[Dec 07, 19:56:24] PATIENCE 0 / 1
[Dec 07, 19:58:32] TRAIN loss 1.5881041870881492 3
[Dec 07, 19:58:37] VALIDATION F1micro, F1macro, loss: 0.2503681885125184 0.252091758515516 1358
[Dec 07, 19:58:47] TEST F1micro, F1macro, loss: 0.26352594773647403 0.267241172

Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109490694
[Dec 07, 20:06:27] TRAIN loss 1.5572765343696684 1
[Dec 07, 20:06:35] VALIDATION F1micro, F1macro, loss: 0.5877712031558185 0.2499132213716694 507
[Dec 07, 20:06:49] TEST F1micro, F1macro, loss: 0.5670611439842209 0.24707741077973083 1014
[Dec 07, 20:07:28] OTHER-TEST F1micro, F1macro, loss: 0.2701509017298491 0.19378345942871336 2717


100%|██████████| 10/10 [15:46<00:00, 94.62s/it]

[Dec 07, 20:23:14] [0.2446, 0.2463, 0.2394, 0.2421, 0.2388, 0.2424, 0.2439, 0.2503, 0.2453, 0.246]
[Dec 07, 20:23:14] [0.2472, 0.249, 0.2488, 0.2425, 0.2449, 0.2368, 0.2417, 0.2458, 0.249, 0.246]
[Dec 07, 20:23:14] [0.1999, 0.2019, 0.206, 0.2102, 0.2081, 0.2111, 0.2096, 0.2077, 0.2081, 0.2074]
[Dec 07, 20:23:14] [0.1948, 0.2012, 0.201, 0.2029, 0.2076, 0.2071, 0.21, 0.2119, 0.2092, 0.2074]
[Dec 07, 20:23:14] PATIENCE 0 / 1





[Dec 07, 20:25:43] TRAIN loss 1.4741909931841735 2
[Dec 07, 20:25:50] VALIDATION F1micro, F1macro, loss: 0.6035502958579881 0.2763577901421471 507
[Dec 07, 20:26:05] TEST F1micro, F1macro, loss: 0.5877712031558185 0.2591722007243733 1014
[Dec 07, 20:26:43] OTHER-TEST F1micro, F1macro, loss: 0.27456753772543246 0.19723996590948026 2717


100%|██████████| 10/10 [15:50<00:00, 95.10s/it]

[Dec 07, 20:42:34] [0.2618, 0.2548, 0.2463, 0.2403, 0.2403, 0.2406, 0.2478, 0.2488, 0.2437, 0.2434]
[Dec 07, 20:42:34] [0.2592, 0.2544, 0.2568, 0.2573, 0.258, 0.2481, 0.2433, 0.2448, 0.2473, 0.2434]
[Dec 07, 20:42:34] [0.1985, 0.1983, 0.1996, 0.2, 0.202, 0.2027, 0.2027, 0.2043, 0.203, 0.2038]
[Dec 07, 20:42:34] [0.1963, 0.1984, 0.1997, 0.2001, 0.2034, 0.2048, 0.2044, 0.203, 0.2036, 0.2038]
[Dec 07, 20:42:34] PATIENCE 0 / 1





[Dec 07, 20:45:03] TRAIN loss 1.3837486275949993 3
[Dec 07, 20:45:10] VALIDATION F1micro, F1macro, loss: 0.5384615384615384 0.29050932266848195 507
[Dec 07, 20:45:25] TEST F1micro, F1macro, loss: 0.5019723865877712 0.284953752508305 1014
[Dec 07, 20:46:03] OTHER-TEST F1micro, F1macro, loss: 0.2241442767758557 0.17616957574279862 2717


100%|██████████| 10/10 [15:45<00:00, 94.53s/it]

[Dec 07, 21:01:48] [0.2907, 0.2898, 0.2848, 0.2731, 0.2671, 0.2591, 0.2651, 0.2584, 0.2406, 0.2406]
[Dec 07, 21:01:48] [0.284, 0.2832, 0.2802, 0.2768, 0.2732, 0.2698, 0.266, 0.2614, 0.2539, 0.2406]
[Dec 07, 21:01:48] [0.1856, 0.1884, 0.1938, 0.1947, 0.194, 0.1996, 0.2013, 0.2047, 0.2024, 0.201]
[Dec 07, 21:01:48] [0.18, 0.1827, 0.1867, 0.19, 0.1921, 0.1961, 0.1979, 0.2026, 0.2055, 0.201]
[Dec 07, 21:01:48] PATIENCE 0 / 1





[Dec 07, 21:04:16] TRAIN loss 1.2635393956223049 4
[Dec 07, 21:04:23] VALIDATION F1micro, F1macro, loss: 0.5542406311637081 0.2948172489869251 507
[Dec 07, 21:04:38] TEST F1micro, F1macro, loss: 0.5433925049309665 0.30556145998415063 1014
[Dec 07, 21:05:16] OTHER-TEST F1micro, F1macro, loss: 0.23739418476260585 0.17692278856198845 2717


100%|██████████| 10/10 [15:48<00:00, 94.83s/it]

[Dec 07, 21:21:04] [0.3052, 0.3058, 0.2973, 0.2963, 0.288, 0.2867, 0.2825, 0.2757, 0.2673, 0.2669]
[Dec 07, 21:21:04] [0.3056, 0.3077, 0.3093, 0.3039, 0.2956, 0.2948, 0.288, 0.282, 0.2749, 0.2669]
[Dec 07, 21:21:04] [0.1787, 0.1878, 0.1915, 0.194, 0.1987, 0.1988, 0.2056, 0.2034, 0.2093, 0.2075]
[Dec 07, 21:21:04] [0.1775, 0.1818, 0.1847, 0.189, 0.1907, 0.1968, 0.1998, 0.1981, 0.2013, 0.2075]
[Dec 07, 21:21:04] PATIENCE 0 / 1





[Dec 07, 21:23:33] TRAIN loss 1.1337519413917452 5
[Dec 07, 21:23:41] VALIDATION F1micro, F1macro, loss: 0.6015779092702169 0.31248710095978377 507
[Dec 07, 21:23:55] TEST F1micro, F1macro, loss: 0.5936883629191322 0.3066209364929041 1014
[Dec 07, 21:24:33] OTHER-TEST F1micro, F1macro, loss: 0.2624217887375782 0.19239320277371613 2717


100%|██████████| 10/10 [15:50<00:00, 95.01s/it]

[Dec 07, 21:40:24] [0.312, 0.3074, 0.2937, 0.2883, 0.2864, 0.2798, 0.2767, 0.2643, 0.2618, 0.2618]
[Dec 07, 21:40:24] [0.3066, 0.302, 0.3004, 0.3021, 0.3072, 0.3013, 0.2947, 0.2802, 0.2629, 0.2618]
[Dec 07, 21:40:24] [0.1939, 0.1961, 0.2004, 0.1999, 0.2028, 0.2024, 0.2041, 0.2052, 0.2, 0.1981]
[Dec 07, 21:40:24] [0.1926, 0.1946, 0.198, 0.2004, 0.2019, 0.2061, 0.2119, 0.2101, 0.2067, 0.1981]
[Dec 07, 21:40:24] PATIENCE 0 / 1





[Dec 07, 21:42:52] TRAIN loss 0.9799561327373659 6
[Dec 07, 21:42:59] VALIDATION F1micro, F1macro, loss: 0.5305719921104537 0.3001061241031042 507
[Dec 07, 21:42:59] PATIENCE 1 / 1
***********************************
EVIDENCE_ONLY - snes - stop
load data function: step = stop
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = stop
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype EVIDENCE_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109486854
[Dec 07, 21:47:28] TRAIN loss 1.554868524340359 1
[Dec 07, 21:47:33] VALIDATION F1micro, F1macro, loss: 0.3510848126232742 0.15613132316210768 507
[Dec 07, 21:47:42] TEST F1micro, F1macro, loss: 0.34023668639053256 0.15346336094250945 1014
[Dec 07, 21:48:06] OTHER-TEST F1micro, F1macro, loss: 0.21420684578579316 0.08927461287926404 2717


100%|██████████| 10/10 [09:27<00:00, 56.73s/it]

[Dec 07, 21:57:34] [0.1342, 0.1047, 0.0792, 0.0518, 0.0452, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439]
[Dec 07, 21:57:34] [0.1531, 0.1356, 0.1093, 0.0829, 0.0548, 0.0439, 0.0439, 0.0439, 0.0439, 0.0439]
[Dec 07, 21:57:34] [0.0776, 0.0752, 0.0692, 0.0661, 0.066, 0.066, 0.066, 0.066, 0.066, 0.066]
[Dec 07, 21:57:34] [0.085, 0.0781, 0.0732, 0.0691, 0.0666, 0.066, 0.066, 0.066, 0.066, 0.066]
[Dec 07, 21:57:34] PATIENCE 0 / 1





[Dec 07, 21:59:12] TRAIN loss 1.505859846198881 2
[Dec 07, 21:59:16] VALIDATION F1micro, F1macro, loss: 0.4832347140039448 0.26784091671188454 507
[Dec 07, 21:59:26] TEST F1micro, F1macro, loss: 0.45759368836291914 0.2607251363538693 1014
[Dec 07, 21:59:50] OTHER-TEST F1micro, F1macro, loss: 0.2403386087596614 0.18879851528368177 2717


100%|██████████| 10/10 [09:28<00:00, 56.84s/it]

[Dec 07, 22:09:18] [0.2588, 0.257, 0.2457, 0.2385, 0.2263, 0.2094, 0.1483, 0.091, 0.0534, 0.0439]
[Dec 07, 22:09:18] [0.2591, 0.2615, 0.2503, 0.2556, 0.2446, 0.2403, 0.2181, 0.1789, 0.0985, 0.0439]
[Dec 07, 22:09:18] [0.1773, 0.1794, 0.168, 0.1602, 0.1509, 0.1408, 0.1178, 0.1035, 0.0827, 0.066]
[Dec 07, 22:09:18] [0.1854, 0.1817, 0.1763, 0.1681, 0.1564, 0.1493, 0.1389, 0.1236, 0.1078, 0.066]
[Dec 07, 22:09:18] PATIENCE 0 / 1





[Dec 07, 22:10:56] TRAIN loss 1.4551104918323658 3
[Dec 07, 22:11:01] VALIDATION F1micro, F1macro, loss: 0.3905325443786982 0.24086724059618841 507
[Dec 07, 22:11:01] PATIENCE 1 / 1
***********************************
CLAIM_ONLY - snes - stop
load data function: step = stop
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = stop
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype CLAIM_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.bias', 'predic

model parameters 109486085
[Dec 07, 22:14:28] TRAIN loss 1.5636356157829632 1
[Dec 07, 22:14:29] VALIDATION F1micro, F1macro, loss: 0.5581854043392505 0.20225627905294646 507
[Dec 07, 22:14:32] TEST F1micro, F1macro, loss: 0.5798816568047337 0.219048009488417 1014
[Dec 07, 22:14:39] OTHER-TEST F1micro, F1macro, loss: 0.2881854987118145 0.16704114298652603 2717
[Dec 07, 22:14:39] PATIENCE 0 / 1
[Dec 07, 22:15:11] TRAIN loss 1.5084122659990917 2
[Dec 07, 22:15:12] VALIDATION F1micro, F1macro, loss: 0.5364891518737672 0.2680512932488016 507
[Dec 07, 22:15:15] TEST F1micro, F1macro, loss: 0.5088757396449705 0.2470391667835225 1014
[Dec 07, 22:15:22] OTHER-TEST F1micro, F1macro, loss: 0.2664703717335296 0.19357348220083984 2717
[Dec 07, 22:15:22] PATIENCE 0 / 1
[Dec 07, 22:15:54] TRAIN loss 1.4261069054136406 3
[Dec 07, 22:15:55] VALIDATION F1micro, F1macro, loss: 0.5463510848126233 0.2549523098522143 507
[Dec 07, 22:15:55] PATIENCE 1 / 1
***********************************
CLAIM_AND_EVIDEN

Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109492231
[Dec 07, 22:25:15] TRAIN loss 1.7424928896257643 1
[Dec 07, 22:25:35] VALIDATION F1micro, F1macro, loss: 0.24742268041237114 0.23110648097693787 1358
[Dec 07, 22:26:16] TEST F1micro, F1macro, loss: 0.2583732057416268 0.24742226976935788 2717
[Dec 07, 22:26:31] OTHER-TEST F1micro, F1macro, loss: 0.6193293885601578 0.2388211110366833 1014


100%|██████████| 10/10 [17:06<00:00, 102.64s/it]

[Dec 07, 22:43:38] [0.2464, 0.2407, 0.2371, 0.2327, 0.2311, 0.2271, 0.2244, 0.2157, 0.2156, 0.213]
[Dec 07, 22:43:38] [0.2493, 0.2446, 0.2449, 0.2347, 0.2353, 0.2372, 0.2292, 0.2256, 0.2188, 0.213]
[Dec 07, 22:43:38] [0.2405, 0.2293, 0.2239, 0.2231, 0.2287, 0.2239, 0.2214, 0.2204, 0.2237, 0.2234]
[Dec 07, 22:43:38] [0.2388, 0.2264, 0.2244, 0.2177, 0.2205, 0.2257, 0.2265, 0.2209, 0.2191, 0.2234]
[Dec 07, 22:43:38] PATIENCE 0 / 1





[Dec 07, 22:50:07] TRAIN loss 1.6506274799915437 2
[Dec 07, 22:50:27] VALIDATION F1micro, F1macro, loss: 0.26877761413843887 0.24961543962105406 1358
[Dec 07, 22:51:08] TEST F1micro, F1macro, loss: 0.26205373573794627 0.2409368543442769 2717
[Dec 07, 22:51:24] OTHER-TEST F1micro, F1macro, loss: 0.6084812623274162 0.20587741329240522 1014


100%|██████████| 10/10 [17:11<00:00, 103.15s/it]

[Dec 07, 23:08:35] [0.2422, 0.2492, 0.2489, 0.2472, 0.2457, 0.2463, 0.246, 0.2443, 0.2376, 0.2353]
[Dec 07, 23:08:35] [0.2412, 0.2405, 0.2423, 0.2431, 0.2426, 0.2412, 0.2463, 0.2426, 0.2402, 0.2353]
[Dec 07, 23:08:35] [0.2091, 0.2122, 0.215, 0.2106, 0.2289, 0.2259, 0.2243, 0.236, 0.2495, 0.2468]
[Dec 07, 23:08:35] [0.2059, 0.2087, 0.211, 0.203, 0.213, 0.2118, 0.223, 0.2291, 0.2271, 0.2468]
[Dec 07, 23:08:35] PATIENCE 0 / 1





[Dec 07, 23:15:03] TRAIN loss 1.5583615373251165 3
[Dec 07, 23:15:24] VALIDATION F1micro, F1macro, loss: 0.26362297496318116 0.2566323577287258 1358
[Dec 07, 23:16:05] TEST F1micro, F1macro, loss: 0.2741994847258005 0.26549849566400185 2717
[Dec 07, 23:16:20] OTHER-TEST F1micro, F1macro, loss: 0.5976331360946746 0.24956067227549528 1014


100%|██████████| 10/10 [17:10<00:00, 103.03s/it]

[Dec 07, 23:33:30] [0.2642, 0.2622, 0.2591, 0.2566, 0.2572, 0.2576, 0.2541, 0.2463, 0.2433, 0.2423]
[Dec 07, 23:33:30] [0.2648, 0.2663, 0.2658, 0.2637, 0.2577, 0.2639, 0.2588, 0.2518, 0.2459, 0.2423]
[Dec 07, 23:33:30] [0.2677, 0.267, 0.2738, 0.2837, 0.2741, 0.267, 0.2618, 0.2689, 0.2764, 0.2763]
[Dec 07, 23:33:30] [0.2462, 0.2756, 0.2761, 0.2643, 0.2618, 0.2621, 0.2522, 0.2578, 0.2602, 0.2763]
[Dec 07, 23:33:30] PATIENCE 0 / 1





[Dec 07, 23:39:57] TRAIN loss 1.4392728062051157 4
[Dec 07, 23:40:17] VALIDATION F1micro, F1macro, loss: 0.28718703976435933 0.29123592570291773 1358
[Dec 07, 23:40:59] TEST F1micro, F1macro, loss: 0.29407434670592564 0.3030501507787282 2717
[Dec 07, 23:41:14] OTHER-TEST F1micro, F1macro, loss: 0.5414201183431953 0.2546993072445872 1014


100%|██████████| 10/10 [17:03<00:00, 102.38s/it]

[Dec 07, 23:58:18] [0.2991, 0.288, 0.2819, 0.2719, 0.2651, 0.2631, 0.2573, 0.2571, 0.2481, 0.2454]
[Dec 07, 23:58:18] [0.302, 0.3033, 0.3008, 0.2918, 0.2925, 0.2884, 0.2851, 0.2752, 0.26, 0.2454]
[Dec 07, 23:58:18] [0.2525, 0.2661, 0.2527, 0.2503, 0.2526, 0.2574, 0.2469, 0.2383, 0.2291, 0.2293]
[Dec 07, 23:58:18] [0.2544, 0.2547, 0.2644, 0.2568, 0.2672, 0.2639, 0.2535, 0.2494, 0.2416, 0.2293]
[Dec 07, 23:58:18] PATIENCE 0 / 1





[Dec 08, 00:04:44] TRAIN loss 1.308837356540301 5
[Dec 08, 00:05:04] VALIDATION F1micro, F1macro, loss: 0.2930780559646539 0.2956771441321285 1358
[Dec 08, 00:05:44] TEST F1micro, F1macro, loss: 0.3021715126978285 0.3042885096952065 2717
[Dec 08, 00:05:59] OTHER-TEST F1micro, F1macro, loss: 0.5857988165680473 0.24499704860287963 1014


100%|██████████| 10/10 [17:00<00:00, 102.09s/it]

[Dec 08, 00:23:00] [0.3005, 0.2892, 0.2817, 0.275, 0.2764, 0.272, 0.2669, 0.2595, 0.2519, 0.25]
[Dec 08, 00:23:00] [0.3036, 0.3055, 0.3072, 0.3026, 0.2996, 0.3006, 0.2972, 0.2903, 0.2728, 0.25]
[Dec 08, 00:23:00] [0.2467, 0.2459, 0.251, 0.2589, 0.2616, 0.2751, 0.2575, 0.2481, 0.2542, 0.2542]
[Dec 08, 00:23:00] [0.2416, 0.2483, 0.2489, 0.2496, 0.2511, 0.2521, 0.2587, 0.2661, 0.2628, 0.2542]
[Dec 08, 00:23:00] PATIENCE 0 / 1





[Dec 08, 00:29:27] TRAIN loss 1.1644019044506284 6
[Dec 08, 00:29:47] VALIDATION F1micro, F1macro, loss: 0.28865979381443296 0.2898925624186962 1358
[Dec 08, 00:29:47] PATIENCE 1 / 1
***********************************
EVIDENCE_ONLY - pomt - stop
load data function: step = stop
****load data: preprocess *****
len 13581 , pants on fire! (10.6\%), false (19.2\%), mostly false (17.0\%), half-true (19.8\%), mostly true (18.8\%), true (14.8\%)
load data function: step = stop
****load data: preprocess *****
len 5069 , pants on fire! (0.0\%), false (64.3\%), mostly false (7.5\%), half-true (12.3\%), mostly true (2.8\%), true (13.0\%)
***run_bert*** with inputtype EVIDENCE_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109487623
[Dec 08, 00:36:57] TRAIN loss 1.7327233644458593 1
[Dec 08, 00:37:09] VALIDATION F1micro, F1macro, loss: 0.26435935198821797 0.2403207000072637 1358
[Dec 08, 00:37:33] TEST F1micro, F1macro, loss: 0.26278984173721015 0.23956341497958247 2717
[Dec 08, 00:37:43] OTHER-TEST F1micro, F1macro, loss: 0.6153846153846154 0.21400594515221333 1014


100%|██████████| 10/10 [09:41<00:00, 58.17s/it]

[Dec 08, 00:47:25] [0.2325, 0.2246, 0.2155, 0.1995, 0.1889, 0.1644, 0.1298, 0.0764, 0.0387, 0.0318]
[Dec 08, 00:47:25] [0.2362, 0.2306, 0.2246, 0.2109, 0.198, 0.1814, 0.1671, 0.1293, 0.0558, 0.0318]
[Dec 08, 00:47:25] [0.2114, 0.2213, 0.2182, 0.2221, 0.2326, 0.225, 0.1883, 0.1644, 0.1565, 0.1565]
[Dec 08, 00:47:25] [0.2133, 0.2222, 0.2198, 0.2265, 0.2209, 0.2248, 0.2236, 0.1938, 0.1615, 0.1565]
[Dec 08, 00:47:25] PATIENCE 0 / 1





[Dec 08, 00:51:41] TRAIN loss 1.6557174457137607 2
[Dec 08, 00:51:53] VALIDATION F1micro, F1macro, loss: 0.27466863033873345 0.26830482341124406 1358
[Dec 08, 00:52:17] TEST F1micro, F1macro, loss: 0.27530364372469635 0.26633977746475795 2717
[Dec 08, 00:52:26] OTHER-TEST F1micro, F1macro, loss: 0.5867850098619329 0.27263808735369993 1014


100%|██████████| 10/10 [09:41<00:00, 58.18s/it]

[Dec 08, 01:02:08] [0.2498, 0.2355, 0.2176, 0.2027, 0.1882, 0.1615, 0.1435, 0.1207, 0.0701, 0.0318]
[Dec 08, 01:02:08] [0.2639, 0.2622, 0.2566, 0.2335, 0.2244, 0.1958, 0.1779, 0.1528, 0.1273, 0.0318]
[Dec 08, 01:02:08] [0.2702, 0.2637, 0.2638, 0.2488, 0.2493, 0.2215, 0.2059, 0.1923, 0.1646, 0.1565]
[Dec 08, 01:02:08] [0.2704, 0.2705, 0.2629, 0.259, 0.2509, 0.2287, 0.2169, 0.2123, 0.1838, 0.1565]
[Dec 08, 01:02:08] PATIENCE 0 / 1





[Dec 08, 01:06:25] TRAIN loss 1.5684562834261042 3
[Dec 08, 01:06:37] VALIDATION F1micro, F1macro, loss: 0.3114874815905744 0.32691526056915526 1358
[Dec 08, 01:07:01] TEST F1micro, F1macro, loss: 0.2907618697092381 0.3033523799018249 2717
[Dec 08, 01:07:10] OTHER-TEST F1micro, F1macro, loss: 0.5305719921104537 0.2611664741625742 1014


100%|██████████| 10/10 [09:42<00:00, 58.22s/it]

[Dec 08, 01:16:52] [0.2924, 0.2711, 0.2591, 0.2396, 0.2289, 0.2055, 0.1794, 0.1424, 0.0741, 0.0484]
[Dec 08, 01:16:52] [0.3013, 0.2977, 0.2935, 0.2913, 0.2889, 0.2763, 0.2633, 0.2335, 0.1641, 0.0484]
[Dec 08, 01:16:52] [0.2577, 0.2433, 0.2346, 0.2261, 0.2252, 0.1996, 0.1806, 0.1565, 0.0416, 0.0279]
[Dec 08, 01:16:52] [0.2588, 0.2667, 0.2604, 0.26, 0.2462, 0.2465, 0.2401, 0.2269, 0.1872, 0.0279]
[Dec 08, 01:16:52] PATIENCE 0 / 1





[Dec 08, 01:21:09] TRAIN loss 1.4760686149479814 4
[Dec 08, 01:21:21] VALIDATION F1micro, F1macro, loss: 0.31443298969072164 0.31491762945343105 1358
[Dec 08, 01:21:21] PATIENCE 1 / 1
***********************************
CLAIM_ONLY - pomt - stop
load data function: step = stop
****load data: preprocess *****
len 13581 , pants on fire! (10.6\%), false (19.2\%), mostly false (17.0\%), half-true (19.8\%), mostly true (18.8\%), true (14.8\%)
load data function: step = stop
****load data: preprocess *****
len 5069 , pants on fire! (0.0\%), false (64.3\%), mostly false (7.5\%), half-true (12.3\%), mostly true (2.8\%), true (13.0\%)
***run_bert*** with inputtype CLAIM_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.bias', 'predic

model parameters 109486854
[Dec 08, 01:26:20] TRAIN loss 1.75115278141309 1
[Dec 08, 01:26:25] VALIDATION F1micro, F1macro, loss: 0.2496318114874816 0.22653631430080332 1358
[Dec 08, 01:26:35] TEST F1micro, F1macro, loss: 0.25432462274567535 0.23807734764625066 2717
[Dec 08, 01:26:39] OTHER-TEST F1micro, F1macro, loss: 0.606508875739645 0.22135585932760588 1014
[Dec 08, 01:26:39] PATIENCE 0 / 1
[Dec 08, 01:28:46] TRAIN loss 1.6736927112963302 2
[Dec 08, 01:28:51] VALIDATION F1micro, F1macro, loss: 0.2621502209131075 0.2540186349801756 1358
[Dec 08, 01:29:01] TEST F1micro, F1macro, loss: 0.256900993743099 0.2509256978425883 2717
[Dec 08, 01:29:05] OTHER-TEST F1micro, F1macro, loss: 0.5946745562130178 0.24051825636549537 1014
[Dec 08, 01:29:05] PATIENCE 0 / 1
[Dec 08, 01:31:11] TRAIN loss 1.5864891817352076 3
[Dec 08, 01:31:16] VALIDATION F1micro, F1macro, loss: 0.26730486008836524 0.2639525692482643 1358
[Dec 08, 01:31:26] TEST F1micro, F1macro, loss: 0.2602134707397865 0.25829154945878

Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109490694
[Dec 08, 01:45:18] TRAIN loss 1.5719651036165856 1
[Dec 08, 01:45:25] VALIDATION F1micro, F1macro, loss: 0.5936883629191322 0.19210204916773926 507
[Dec 08, 01:45:39] TEST F1micro, F1macro, loss: 0.606508875739645 0.19067441869745133 1014
[Dec 08, 01:46:14] OTHER-TEST F1micro, F1macro, loss: 0.291866028708134 0.16285063243093442 2717


100%|██████████| 10/10 [14:35<00:00, 87.54s/it]

[Dec 08, 02:00:49] [0.2014, 0.2165, 0.216, 0.2137, 0.2064, 0.1972, 0.1932, 0.1793, 0.1687, 0.1684]
[Dec 08, 02:00:49] [0.1907, 0.2067, 0.2126, 0.2197, 0.2178, 0.2138, 0.1972, 0.1913, 0.18, 0.1684]
[Dec 08, 02:00:49] [0.1707, 0.1812, 0.1837, 0.184, 0.177, 0.1705, 0.1662, 0.1593, 0.1427, 0.141]
[Dec 08, 02:00:49] [0.1652, 0.1681, 0.1809, 0.1875, 0.187, 0.1783, 0.1728, 0.1672, 0.1523, 0.141]
[Dec 08, 02:00:49] PATIENCE 0 / 1





[Dec 08, 02:03:09] TRAIN loss 1.5036715031475634 2
[Dec 08, 02:03:16] VALIDATION F1micro, F1macro, loss: 0.5700197238658777 0.24169366090385203 507
[Dec 08, 02:03:29] TEST F1micro, F1macro, loss: 0.5838264299802761 0.2606037194443799 1014
[Dec 08, 02:04:04] OTHER-TEST F1micro, F1macro, loss: 0.28156054471843944 0.185495547299291 2717


100%|██████████| 10/10 [14:33<00:00, 87.32s/it]

[Dec 08, 02:18:37] [0.2588, 0.2533, 0.2514, 0.2442, 0.2367, 0.2334, 0.2281, 0.2245, 0.2239, 0.2239]
[Dec 08, 02:18:37] [0.2606, 0.2518, 0.248, 0.2438, 0.243, 0.2365, 0.2305, 0.2237, 0.2282, 0.2239]
[Dec 08, 02:18:37] [0.1872, 0.1844, 0.1874, 0.1868, 0.1894, 0.1874, 0.1849, 0.1875, 0.1874, 0.1871]
[Dec 08, 02:18:37] [0.1866, 0.1841, 0.1832, 0.1838, 0.1817, 0.186, 0.187, 0.1884, 0.1901, 0.1871]
[Dec 08, 02:18:37] PATIENCE 0 / 1





[Dec 08, 02:20:56] TRAIN loss 1.398839675796193 3
[Dec 08, 02:21:03] VALIDATION F1micro, F1macro, loss: 0.5108481262327417 0.251095462918123 507
[Dec 08, 02:21:17] TEST F1micro, F1macro, loss: 0.5473372781065089 0.29364787432528255 1014
[Dec 08, 02:21:51] OTHER-TEST F1micro, F1macro, loss: 0.25947736474052263 0.21307485176046623 2717


100%|██████████| 10/10 [14:32<00:00, 87.21s/it]

[Dec 08, 02:36:23] [0.2992, 0.2894, 0.2772, 0.2663, 0.2602, 0.2586, 0.2451, 0.2294, 0.2294, 0.2291]
[Dec 08, 02:36:23] [0.2948, 0.2855, 0.2896, 0.2853, 0.2777, 0.2649, 0.2591, 0.2523, 0.234, 0.2291]
[Dec 08, 02:36:23] [0.2117, 0.2082, 0.2021, 0.1986, 0.1958, 0.1909, 0.1895, 0.1848, 0.1804, 0.1815]
[Dec 08, 02:36:23] [0.2114, 0.2108, 0.21, 0.2069, 0.2027, 0.1976, 0.1936, 0.1902, 0.1863, 0.1815]
[Dec 08, 02:36:23] PATIENCE 0 / 1





[Dec 08, 02:38:42] TRAIN loss 1.2838493384219505 4
[Dec 08, 02:38:49] VALIDATION F1micro, F1macro, loss: 0.4990138067061144 0.2668251444722033 507
[Dec 08, 02:39:02] TEST F1micro, F1macro, loss: 0.514792899408284 0.27321286197794875 1014
[Dec 08, 02:39:37] OTHER-TEST F1micro, F1macro, loss: 0.21825542878174456 0.18356690566971048 2717


100%|██████████| 10/10 [14:33<00:00, 87.34s/it]

[Dec 08, 02:54:11] [0.285, 0.2894, 0.2919, 0.2852, 0.2887, 0.2943, 0.2852, 0.2764, 0.2594, 0.261]
[Dec 08, 02:54:11] [0.2736, 0.2779, 0.2876, 0.2815, 0.2872, 0.2851, 0.2823, 0.2748, 0.2708, 0.261]
[Dec 08, 02:54:11] [0.1894, 0.2013, 0.2058, 0.2161, 0.2154, 0.2111, 0.2028, 0.1995, 0.1928, 0.1903]
[Dec 08, 02:54:11] [0.186, 0.1872, 0.1923, 0.1946, 0.1995, 0.2024, 0.2069, 0.2043, 0.2009, 0.1903]
[Dec 08, 02:54:11] PATIENCE 0 / 1





[Dec 08, 02:56:29] TRAIN loss 1.1439624896424043 5
[Dec 08, 02:56:36] VALIDATION F1micro, F1macro, loss: 0.5621301775147929 0.2632264991802711 507
[Dec 08, 02:56:36] PATIENCE 1 / 1
***********************************
EVIDENCE_ONLY - snes - pos-stop
load data function: step = pos-stop
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = pos-stop
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype EVIDENCE_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.weight', 'attn

model parameters 109486854
[Dec 08, 03:07:15] TRAIN loss 1.5562558512429934 1
[Dec 08, 03:07:19] VALIDATION F1micro, F1macro, loss: 0.6074950690335306 0.20882904936760505 507
[Dec 08, 03:07:28] TEST F1micro, F1macro, loss: 0.6222879684418146 0.20489920121719285 1014
[Dec 08, 03:07:49] OTHER-TEST F1micro, F1macro, loss: 0.2944423997055576 0.17738475586584088 2717


100%|██████████| 10/10 [08:28<00:00, 50.89s/it]

[Dec 08, 03:16:18] [0.2052, 0.2039, 0.1897, 0.1873, 0.1847, 0.1832, 0.1872, 0.1868, 0.0572, 0.0461]
[Dec 08, 03:16:18] [0.2053, 0.2061, 0.2065, 0.2068, 0.2031, 0.1957, 0.1913, 0.1855, 0.1949, 0.0461]
[Dec 08, 03:16:18] [0.1789, 0.1785, 0.1749, 0.18, 0.1731, 0.1539, 0.1406, 0.1327, 0.0986, 0.0514]
[Dec 08, 03:16:18] [0.1761, 0.1763, 0.1763, 0.1801, 0.1725, 0.1668, 0.1548, 0.1306, 0.1278, 0.0514]
[Dec 08, 03:16:18] PATIENCE 0 / 1





[Dec 08, 03:17:49] TRAIN loss 1.5042221723376095 2
[Dec 08, 03:17:53] VALIDATION F1micro, F1macro, loss: 0.4260355029585799 0.2293673931971804 507
[Dec 08, 03:18:02] TEST F1micro, F1macro, loss: 0.4358974358974359 0.2297177262848328 1014
[Dec 08, 03:18:23] OTHER-TEST F1micro, F1macro, loss: 0.2443871917556128 0.15280523036422194 2717


100%|██████████| 10/10 [08:28<00:00, 50.88s/it]

[Dec 08, 03:26:52] [0.2244, 0.2215, 0.2193, 0.2073, 0.1935, 0.1698, 0.1275, 0.0694, 0.0467, 0.0461]
[Dec 08, 03:26:52] [0.2297, 0.2268, 0.218, 0.2157, 0.2129, 0.1985, 0.1799, 0.1359, 0.0651, 0.0461]
[Dec 08, 03:26:52] [0.1512, 0.1475, 0.1476, 0.1428, 0.1404, 0.1383, 0.1272, 0.104, 0.0705, 0.0514]
[Dec 08, 03:26:52] [0.1532, 0.1491, 0.1508, 0.1512, 0.1508, 0.1481, 0.1427, 0.129, 0.0868, 0.0514]
[Dec 08, 03:26:52] PATIENCE 0 / 1





[Dec 08, 03:28:23] TRAIN loss 1.4634790077201418 3
[Dec 08, 03:28:27] VALIDATION F1micro, F1macro, loss: 0.4477317554240631 0.24832223446325652 507
[Dec 08, 03:28:36] TEST F1micro, F1macro, loss: 0.4546351084812623 0.24557488868090416 1014
[Dec 08, 03:28:57] OTHER-TEST F1micro, F1macro, loss: 0.24475524475524477 0.17860021480828908 2717


100%|██████████| 10/10 [08:30<00:00, 51.04s/it]

[Dec 08, 03:37:27] [0.2377, 0.2304, 0.2184, 0.2154, 0.2063, 0.1966, 0.164, 0.1135, 0.0487, 0.0461]
[Dec 08, 03:37:27] [0.246, 0.2424, 0.2431, 0.2318, 0.2299, 0.222, 0.2162, 0.1747, 0.1204, 0.0461]
[Dec 08, 03:37:27] [0.1793, 0.1776, 0.1705, 0.1676, 0.1653, 0.155, 0.1444, 0.1207, 0.0817, 0.0514]
[Dec 08, 03:37:27] [0.1793, 0.1809, 0.1754, 0.1758, 0.1734, 0.1716, 0.1601, 0.1459, 0.1131, 0.0514]
[Dec 08, 03:37:27] PATIENCE 0 / 1





[Dec 08, 03:38:58] TRAIN loss 1.3963696813663922 4
[Dec 08, 03:39:02] VALIDATION F1micro, F1macro, loss: 0.5305719921104537 0.2468524632859949 507
[Dec 08, 03:39:02] PATIENCE 1 / 1
***********************************
CLAIM_ONLY - snes - pos-stop
load data function: step = pos-stop
****load data: preprocess *****
len 5069 , false (64.3\%), mostly false (7.5\%), mixture (12.3\%), mostly true (2.8\%), true (13.0\%)
load data function: step = pos-stop
****load data: preprocess *****
len 13581 , false (29.7\%), mostly false (17.0\%), mixture (19.8\%), mostly true (18.8\%), true (14.8\%)
***run_bert*** with inputtype CLAIM_ONLY


Some weights of the model checkpoint at bert-base-uncased were not used when initializing MyBertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MyBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MyBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MyBertModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['predictor.bias', 'predic

model parameters 109486085
[Dec 08, 03:48:43] TRAIN loss 1.5691846125834696 1
[Dec 08, 03:48:44] VALIDATION F1micro, F1macro, loss: 0.611439842209073 0.20626703450826875 507
[Dec 08, 03:48:46] TEST F1micro, F1macro, loss: 0.6242603550295858 0.22564203378931608 1014
[Dec 08, 03:48:53] OTHER-TEST F1micro, F1macro, loss: 0.2903938167096062 0.18142095448614276 2717
[Dec 08, 03:48:53] PATIENCE 0 / 1
[Dec 08, 03:49:21] TRAIN loss 1.5016521578906357 2
[Dec 08, 03:49:22] VALIDATION F1micro, F1macro, loss: 0.5759368836291914 0.22799404122084294 507
[Dec 08, 03:49:25] TEST F1micro, F1macro, loss: 0.5887573964497042 0.2465869689623133 1014
[Dec 08, 03:49:31] OTHER-TEST F1micro, F1macro, loss: 0.27972027972027974 0.20365019366161152 2717
[Dec 08, 03:49:31] PATIENCE 0 / 1
[Dec 08, 03:49:59] TRAIN loss 1.4137388573506393 3
[Dec 08, 03:50:00] VALIDATION F1micro, F1macro, loss: 0.4891518737672584 0.24087395291738725 507
[Dec 08, 03:50:03] TEST F1micro, F1macro, loss: 0.5157790927021696 0.2771329738655

KeyboardInterrupt: ignored

In [None]:
# this cell is for formal and informal in the steps, need to have separate cell due to we don't have formal and informal data for politifact
# the cell will run all the training and evaluation according to the configurations, 
# and generate the results and store under the results folder

import gc

gc.collect()
class vars():
    def __init__(self, mode, inputtype, dataset):
        if mode == "bow":
            self.dataset = dataset
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bow"
            self.batchsize = 2
            self.eval_per_epoch = 1
            self.lr = 0.0001
        elif mode == 'lstm':
            self.dataset = dataset
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "lstm"
            self.batchsize = 16
            self.eval_per_epoch = 1
            self.lr = 0.0001
            self.lstm_hidden_dim = 128
            self.lstm_layers = 2
            self.lstm_dropout = 0.1
        elif mode == 'bert':
            self.dataset = dataset
            self.inputtype = inputtype
            self.filter_websites = 0
            self.model = "bert"
            if self.dataset == "snes":
              self.batchsize = 6
            elif self.dataset == "pomt":
              self.batchsize = 4
            self.eval_per_epoch = 1
            self.lr = 0.000003            

filepath = 'sorted.uk.word.unigrams'  
word_freq = {}  
count = 0
with open(filepath, encoding= 'utf-8') as f:
    for line in f:
        line = line.rstrip()
        if line:
            x = line.split('\t')
            #print(x)
            #print(key, val)
            #print(str(x[1]))
            word_freq[x[1]] = str(x[0])
        count +=1
        if count > 100000:
            break

# only for formal and informal in the steps, need to have separate cell due to we don't have formal and informal data for politifact
steps = [['formal'], ['informal']]
modes = ['bert']
datasets = ['snes']
inputtypes = ['CLAIM_AND_EVIDENCE', 'EVIDENCE_ONLY', 'CLAIM_ONLY']


for step in steps:
  for mode in modes:    
    for dataset in datasets:
      for inputtype in inputtypes:       
        print('***********************************')
        stepstr = "-".join([s for s in step])                
        args = vars(mode, inputtype, dataset)

        if args.filter_websites > 0.5:
            savename = "results/" + "-".join([str(v) for v in [args.filter_websites, args.model, args.dataset, args.inputtype, stepstr, args.lr, args.batchsize]])
        else:
            savename = "results/" + "-".join([str(v) for v in [args.model, args.dataset, args.inputtype, stepstr, args.lr, args.batchsize]])

        if args.model == "lstm":
            savename += "-" + "-".join([str(v) for v in [args.lstm_hidden_dim, args.lstm_layers, stepstr, args.lstm_dropout]])
        savename += ".pkl"
        print(args.inputtype, "-", args.dataset, "-", stepstr)
        inputtype = INPUT_TYPE_ORDER.index(args.inputtype)
        main_data, snippets_data, label_order, splits = load_data(args.dataset, stepstr)

        if args.filter_websites > 0.5:
            snippets_data = filter_websites(snippets_data)

        params = {"batch_size": args.batchsize, "shuffle": True, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}
        eval_params = {"batch_size": args.batchsize, "shuffle": False, "num_workers": 1, "collate_fn": transformer_collate, "persistent_workers": True, "prefetch_factor":5}

        train_generator, val_generator, test_generator, label_weights = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params])

        if stepstr == 'formal' or stepstr == 'informal':
          if args.dataset == "snes":
              main_data, snippets_data, _, splits = load_data("pomt", 'none')
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
              main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
          else:
              main_data, snippets_data, _, splits = load_data("snes", 'stepstr')
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
        else:
          if args.dataset == "snes":
              main_data, snippets_data, _, splits = load_data("pomt", stepstr)
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "pants on fire!", 2] = "false"
              main_data.iloc[main_data.iloc[:, 2] == "half-true", 2] = "mixture"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)
          else:
              main_data, snippets_data, _, splits = load_data("snes", stepstr)
              if args.filter_websites > 0.5:
                  snippets_data = filter_websites(snippets_data)
              main_data.iloc[main_data.iloc[:, 2] == "mixture", 2] = "half-true"
              _, _, other_test_generator, _ = make_generators(main_data, snippets_data, label_order, splits, [params, eval_params], other_dataset=True)


        if args.model == "bert":
            run_bert(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator, stepstr)
        elif args.model == "lstm":
            run_lstm(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)
        elif args.model == "bow":
            # print("run bow")
            run_bow(args, train_generator, val_generator, test_generator, label_weights, inputtype, label_order, savename, other_test_generator)

        gc.collect()


