In [5]:
from vocab_mismatch_utils import *
from data_formatter_utils import *
from datasets import DatasetDict
from datasets import Dataset
from datasets import load_dataset
import transformers
import pandas as pd
from collections import OrderedDict
import operator

from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data.distributed import DistributedSampler
from torch.utils.data.sampler import RandomSampler, SequentialSampler
from torch.nn import CrossEntropyLoss

# Load modules, mainly huggingface basic model handlers.
# Make sure you install huggingface and other packages properly.
from collections import Counter
import json

from nltk.tokenize import TweetTokenizer
from sklearn.metrics import classification_report
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import matthews_corrcoef
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math

import logging
logger = logging.getLogger(__name__)

import os
os.environ["TRANSFORMERS_CACHE"] = "../huggingface_cache/" # Not overload common dir 
                                                           # if run in shared resources.

import random
import sys
from dataclasses import dataclass, field
from typing import Optional
import torch
import argparse
import numpy as np
import pandas as pd
from datasets import load_dataset, load_metric
from datasets import Dataset
from datasets import DatasetDict
from tqdm import tqdm, trange

import transformers
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    EvalPrediction,
    HfArgumentParser,
    PretrainedConfig,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
    EarlyStoppingCallback
)
from transformers.trainer_utils import is_main_process, EvaluationStrategy

#### Setups

In [6]:
def get_dataset(inoculation_data_path, eval_data_path=None, test_data_path=None,
                inoculation_step_sample_size=1.0, 
                eval_sample_limit=-1, seed=42):
    """
    eval_data_path is not needed if it is a saved_to_disk 
    huggingface dataset.
    
    return type is already a huggingface dataset.
    """
    pd_format = True
    if inoculation_data_path.split(".")[-1] != "tsv":
        if len(inoculation_data_path.split(".")) > 1:
            logger.info(f"***** Loading pre-loaded datasets from the disk directly! *****")
            pd_format = False
            datasets = DatasetDict.load_from_disk(inoculation_data_path)
            inoculation_step_sample_size = int(len(datasets["train"]) * inoculation_step_sample_size)
            logger.info(f"***** Inoculation Sample Count: %s *****"%(inoculation_step_sample_size))
            # this may not always start for zero inoculation
            datasets["train"] = datasets["train"].shuffle(seed=seed)
            inoculation_train_df = datasets["train"].select(range(inoculation_step_sample_size))
            eval_df = datasets["validation"]
            datasets["validation"] = datasets["validation"].shuffle(seed=seed)
            if eval_sample_limit != -1:
                datasets["validation"] = datasets["validation"].select(range(eval_sample_limit))
        else:
            logger.info(f"***** Loading downloaded huggingface datasets: {inoculation_data_path}! *****")
            pd_format = False
            if inoculation_data_path in ["sst3", "cola", "mnli", "snli", "mrps", "qnli"]:
                pass
            raise NotImplementedError()
    else:
        train_df = pd.read_csv(inoculation_data_path, delimiter="\t")
        eval_df = pd.read_csv(eval_data_path, delimiter="\t")
        test_df = pd.read_csv(test_data_path, delimiter="\t")
        inoculation_step_sample_size = int(len(train_df) * inoculation_step_sample_size)
        logger.info(f"***** Inoculation Sample Count: %s *****"%(inoculation_step_sample_size))
        # this may not always start for zero inoculation
        inoculation_train_df = train_df.sample(n=inoculation_step_sample_size, 
                                               replace=False, 
                                               random_state=seed) # seed here could not a little annoying.
    if pd_format:
        datasets = {}
        datasets["train"] = Dataset.from_pandas(inoculation_train_df)
        datasets["validation"] = Dataset.from_pandas(eval_df)
        datasets["test"] = Dataset.from_pandas(test_df)
    else:
        datasets = {}
        datasets["train"] = inoculation_train_df
        datasets["validation"] = eval_df
    return datasets

In [7]:
TASK_CONFIG = {
    "sst3": ("text", None),
    "cola": ("sentence", None),
    "mnli": ("premise", "hypothesis"),
    "snli": ("premise", "hypothesis"),
    "mrpc": ("sentence1", "sentence2"),
    "qnli": ("question", "sentence")
}
# WARNING: you dont need BERT tokenizer
# original_vocab = load_bert_vocab("../data-files/bert_vocab.txt")
# original_tokenizer = transformers.BertTokenizer(
#     vocab_file="../data-files/bert_vocab.txt")
# Just use some basic white space tokenizor here!
modified_basic_tokenizer = ModifiedBasicTokenizer()
max_length = 128
per_device_train_batch_size = 128
per_device_eval_batch_size = 128
no_cuda = True
device = torch.device("cuda" if torch.cuda.is_available() and not no_cuda else "cpu")
n_gpu = torch.cuda.device_count() if not no_cuda else 1 # 1 means just on cpu
seed = 42
lr = 1e-3
num_train_epochs = 10
task_name = "sst3"
sentence1_key, sentence2_key = TASK_CONFIG[task_name]

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if n_gpu > 0 and not no_cuda:
    torch.cuda.manual_seed_all(args.seed)

In [8]:
# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
data_file_name = task_name if task_name != "sst3" else "sst-tenary"
datasets = get_dataset(f"../data-files/{data_file_name}/{data_file_name}-train.tsv", 
                       f"../data-files/{data_file_name}/{data_file_name}-dev.tsv", 
                       f"../data-files/{data_file_name}/{data_file_name}-test.tsv")
logger.info(f"***** Train Sample Count (Verify): %s *****"%(len(datasets["train"])))
logger.info(f"***** Valid Sample Count (Verify): %s *****"%(len(datasets["validation"])))
logger.info(f"***** Test Sample Count (Verify): %s *****"%(len(datasets["test"])))

03/17/2021 15:41:03 - INFO - __main__ - ***** Inoculation Sample Count: 159274 *****
03/17/2021 15:41:03 - INFO - __main__ - ***** Train Sample Count (Verify): 159274 *****
03/17/2021 15:41:03 - INFO - __main__ - ***** Valid Sample Count (Verify): 1100 *****
03/17/2021 15:41:03 - INFO - __main__ - ***** Test Sample Count (Verify): 2210 *****


#### BoW preprocessor

In [9]:
# create the vocab file
vocab_index = 0
original_vocab = OrderedDict()
if "train" in datasets:
    for (ex_index, example) in enumerate(tqdm(datasets["train"])):
        if sentence2_key is None:
            sentence_combined = example[sentence1_key]
        else:
            sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
        sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
        for token in sentence_tokens:
            if token not in original_vocab.keys():
                original_vocab[token] = vocab_index
                vocab_index += 1
train_data_only = False
if not train_data_only:
    if "validation" in datasets:
        for (ex_index, example) in enumerate(tqdm(datasets["validation"])):
            if sentence2_key is None:
                sentence_combined = example[sentence1_key]
            else:
                sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
            sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
            for token in sentence_tokens:
                if token not in original_vocab.keys():
                    original_vocab[token] = vocab_index
                    vocab_index += 1

    if "test" in datasets:
        for (ex_index, example) in enumerate(tqdm(datasets["test"])):
            if sentence2_key is None:
                sentence_combined = example[sentence1_key]
            else:
                sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
            sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
            for token in sentence_tokens:
                if token not in original_vocab.keys():
                    original_vocab[token] = vocab_index
                    vocab_index += 1

100%|██████████| 159274/159274 [00:27<00:00, 5889.51it/s]
100%|██████████| 1100/1100 [00:00<00:00, 2909.80it/s]
100%|██████████| 2210/2210 [00:00<00:00, 2802.31it/s]


In [10]:
# BoW feature vectors for train split
train_input_features = []
train_label_ids = []
for (ex_index, example) in enumerate(tqdm(datasets["train"])):
    bow_feature = torch.zeros(len(original_vocab))
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    if ex_index % 50000 == 0:
        print("Example sentence: " + sentence_combined)
    for t in sentence_tokens:
        bow_feature[original_vocab[t]] += 1
    train_input_features.append(bow_feature)
    train_label_ids.append(example["label"])
    
train_input_features = torch.stack(train_input_features, dim=0)
train_input_features = torch.tensor(train_input_features, dtype=torch.float)
train_label_ids = torch.tensor(train_label_ids, dtype=torch.long)
train_data = TensorDataset(train_input_features, train_label_ids)

  0%|          | 517/159274 [00:00<01:03, 2518.45it/s]

Example sentence: Surprisingly, considering that Baird is a former film editor, the movie is rather choppy.


 32%|███▏      | 50460/159274 [00:18<00:41, 2626.66it/s]

Example sentence: achronological


 63%|██████▎   | 100693/159274 [00:36<00:15, 3680.47it/s]

Example sentence: Show


 95%|█████████▍| 150726/159274 [00:50<00:02, 3783.53it/s]

Example sentence: picked me up ,


100%|██████████| 159274/159274 [00:52<00:00, 3005.65it/s]


In [65]:
# BoW feature vectors for validation split
validation_input_features = []
validation_label_ids = []
for (ex_index, example) in enumerate(tqdm(datasets["validation"])):
    bow_feature = torch.zeros(len(original_vocab))
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    for t in sentence_tokens:
        if t in original_vocab.keys():
            bow_feature[original_vocab[t]] += 1
    validation_input_features.append(bow_feature)
    validation_label_ids.append(example["label"])

validation_input_features = torch.stack(validation_input_features, dim=0)
validation_input_features = torch.tensor(validation_input_features, dtype=torch.float)
validation_label_ids = torch.tensor(validation_label_ids, dtype=torch.long)
validation_data = TensorDataset(validation_input_features, validation_label_ids)

100%|██████████| 1100/1100 [00:00<00:00, 1350.96it/s]


In [66]:
# data loader
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=per_device_train_batch_size*n_gpu)
validation_dataloader = DataLoader(validation_data, batch_size=per_device_eval_batch_size*n_gpu, shuffle=False)

#### BoW Classifer

In [67]:
class BOWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BOWClassifier, self).__init__()
        self.classifier = nn.Linear(vocab_size, num_labels)
    def forward(self, x, labels=None):
        logits = self.classifier(x)

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss, logits
        else:
            return logits

In [68]:
class MockBERTBOWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(MockBERTBOWClassifier, self).__init__()
        hidden_dim = 32
        self.mock_bert = nn.Linear(vocab_size, hidden_dim)
        self.mock_activation = nn.Tanh()
        self.classifier = nn.Linear(hidden_dim, num_labels)
    def forward(self, x, labels=None):
        cls = self.mock_activation(self.mock_bert(x))
        logits = self.classifier(cls)

        if labels is not None:
            loss_fct = CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            return loss, logits
        else:
            return logits

In [69]:
# some overriding fun stuffs!
lr = 1e-3
num_train_epochs = 10
model = BOWClassifier(len(validation_label_ids.unique()), len(original_vocab))
optimizer = optim.Adam(model.parameters(), lr=lr)
if n_gpu > 0 and not no_cuda:
    model = torch.nn.DataParallel(model)

#### Main training loop

In [70]:
global_step = 0
for _ in range(int(num_train_epochs)):
    
    model.train()
    # pbar = tqdm(train_dataloader, desc="Iteration")
    for step, batch in enumerate(train_dataloader):
        if torch.cuda.is_available() and not no_cuda:
            torch.cuda.empty_cache()

        input_features, label_ids = batch

        if torch.cuda.is_available() and not no_cuda:
            input_features = input_features.to(device)
            label_ids = label_ids.to(device)

        loss, _ = model(input_features, labels=label_ids)

        if n_gpu > 1:
            loss = loss.mean() # mean() to average on multi-gpu.
        loss.backward()

        optimizer.step()
        model.zero_grad()
        # pbar.set_postfix({'train_loss': loss.tolist()})

        if global_step % 500 == 0:
            logger.info("***** Evaluation Interval Hit *****")
            model.eval()
            all_logits = []
            all_label_ids = []
            with torch.no_grad():
                # pbar = tqdm(validation_dataloader, desc="Iteration")
                for step, batch in enumerate(validation_dataloader):
                    if torch.cuda.is_available() and not no_cuda:
                        torch.cuda.empty_cache()
                        
                    input_features, label_ids = batch
                    
                    if torch.cuda.is_available() and not no_cuda:
                        input_features = input_features.to(device)
                        label_ids = label_ids.to(device)
                    
                    loss, logits = model(input_features, labels=label_ids)
                    logits = F.softmax(logits, dim=-1)
                    logits = logits.detach().cpu().numpy()
                    label_ids = label_ids.to('cpu').numpy()
                    outputs = np.argmax(logits, axis=1)
                    all_logits.append(outputs)
                    all_label_ids.append(label_ids)
                    
            all_logits = np.concatenate(all_logits, axis=0)
            all_label_ids = np.concatenate(all_label_ids, axis=0)
            result_to_save = classification_report(all_label_ids, all_logits, digits=5, output_dict=True)
            print(classification_report(all_label_ids, all_logits, digits=5))
            print("Macro-F1: ", result_to_save["macro avg"]["f1-score"])
                    
        global_step += 1

03/17/2021 17:35:17 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.46552   0.37850   0.41753       428
           1    0.45627   0.54054   0.49485       444
           2    0.22124   0.21930   0.22026       228

    accuracy                        0.41091      1100
   macro avg    0.38101   0.37945   0.37755      1100
weighted avg    0.41115   0.41091   0.40785      1100

Macro-F1:  0.37754515040041176


03/17/2021 17:35:20 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.67143   0.54907   0.60411       428
           1    0.64751   0.76126   0.69979       444
           2    0.32018   0.32018   0.32018       228

    accuracy                        0.58727      1100
   macro avg    0.54637   0.54350   0.54136      1100
weighted avg    0.58897   0.58727   0.58388      1100

Macro-F1:  0.5413605032662876


03/17/2021 17:35:27 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.67521   0.55374   0.60847       428
           1    0.64457   0.78829   0.70922       444
           2    0.33010   0.29825   0.31336       228

    accuracy                        0.59545      1100
   macro avg    0.54996   0.54676   0.54369      1100
weighted avg    0.59131   0.59545   0.58797      1100

Macro-F1:  0.5436854379896822


03/17/2021 17:35:30 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.67430   0.61916   0.64555       428
           1    0.67823   0.79279   0.73105       444
           2    0.38298   0.31579   0.34615       228

    accuracy                        0.62636      1100
   macro avg    0.57850   0.57591   0.57425      1100
weighted avg    0.61550   0.62636   0.61801      1100

Macro-F1:  0.5742522847204851


03/17/2021 17:35:33 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.68345   0.66589   0.67456       428
           1    0.69608   0.79955   0.74423       444
           2    0.36994   0.28070   0.31920       228

    accuracy                        0.64000      1100
   macro avg    0.58316   0.58205   0.57933      1100
weighted avg    0.62357   0.64000   0.62903      1100

Macro-F1:  0.5793310029562648


03/17/2021 17:35:37 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.67317   0.64486   0.65871       428
           1    0.69006   0.79730   0.73981       444
           2    0.35593   0.27632   0.31111       228

    accuracy                        0.63000      1100
   macro avg    0.57305   0.57282   0.56988      1100
weighted avg    0.61423   0.63000   0.61940      1100

Macro-F1:  0.5698780801735291


03/17/2021 17:35:42 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.68148   0.64486   0.66267       428
           1    0.68992   0.80180   0.74167       444
           2    0.35754   0.28070   0.31450       228

    accuracy                        0.63273      1100
   macro avg    0.57632   0.57579   0.57294      1100
weighted avg    0.61774   0.63273   0.62239      1100

Macro-F1:  0.572942682396464


03/17/2021 17:35:49 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.69136   0.65421   0.67227       428
           1    0.68037   0.81982   0.74362       444
           2    0.38750   0.27193   0.31959       228

    accuracy                        0.64182      1100
   macro avg    0.58641   0.58199   0.57849      1100
weighted avg    0.62394   0.64182   0.62797      1100

Macro-F1:  0.5784908236853917


03/17/2021 17:35:54 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.68841   0.66589   0.67696       428
           1    0.69305   0.80856   0.74636       444
           2    0.38690   0.28509   0.32828       228

    accuracy                        0.64455      1100
   macro avg    0.58945   0.58651   0.58387      1100
weighted avg    0.62779   0.64455   0.63270      1100

Macro-F1:  0.5838680648656895


03/17/2021 17:35:57 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.69031   0.68224   0.68625       428
           1    0.70020   0.79955   0.74658       444
           2    0.38235   0.28509   0.32663       228

    accuracy                        0.64727      1100
   macro avg    0.59095   0.58896   0.58649      1100
weighted avg    0.63047   0.64727   0.63606      1100

Macro-F1:  0.5864890597930369


03/17/2021 17:36:00 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.69340   0.68692   0.69014       428
           1    0.71142   0.79955   0.75292       444
           2    0.40678   0.31579   0.35556       228

    accuracy                        0.65545      1100
   macro avg    0.60387   0.60075   0.59954      1100
weighted avg    0.64126   0.65545   0.64613      1100

Macro-F1:  0.5995375418134667


03/17/2021 17:36:03 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.71611   0.65421   0.68376       428
           1    0.69981   0.80856   0.75026       444
           2    0.39286   0.33772   0.36321       228

    accuracy                        0.65091      1100
   macro avg    0.60292   0.60016   0.59908      1100
weighted avg    0.64253   0.65091   0.64416      1100

Macro-F1:  0.5990764879834496


03/17/2021 17:36:05 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.70792   0.66822   0.68750       428
           1    0.69786   0.80631   0.74817       444
           2    0.40984   0.32895   0.36496       228

    accuracy                        0.65364      1100
   macro avg    0.60520   0.60116   0.60021      1100
weighted avg    0.64207   0.65364   0.64514      1100

Macro-F1:  0.6002116241702197


03/17/2021 17:36:08 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.70854   0.65888   0.68281       428
           1    0.70577   0.79955   0.74974       444
           2    0.39196   0.34211   0.36534       228

    accuracy                        0.65000      1100
   macro avg    0.60209   0.60018   0.59929      1100
weighted avg    0.64180   0.65000   0.64402      1100

Macro-F1:  0.5992947678696947


03/17/2021 17:36:10 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.70968   0.66822   0.68833       428
           1    0.69804   0.80180   0.74633       444
           2    0.40107   0.32895   0.36145       228

    accuracy                        0.65182      1100
   macro avg    0.60293   0.59966   0.59870      1100
weighted avg    0.64101   0.65182   0.64399      1100

Macro-F1:  0.598701445505322


03/17/2021 17:36:13 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.70433   0.68458   0.69431       428
           1    0.70000   0.80405   0.74843       444
           2    0.41954   0.32018   0.36318       228

    accuracy                        0.65727      1100
   macro avg    0.60796   0.60294   0.60197      1100
weighted avg    0.64355   0.65727   0.64752      1100

Macro-F1:  0.6019748495888319


03/17/2021 17:36:15 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.72222   0.63785   0.67742       428
           1    0.69763   0.79505   0.74316       444
           2    0.37037   0.35088   0.36036       228

    accuracy                        0.64182      1100
   macro avg    0.59674   0.59459   0.59365      1100
weighted avg    0.63937   0.64182   0.63824      1100

Macro-F1:  0.5936458699786374


03/17/2021 17:36:18 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.71835   0.64953   0.68221       428
           1    0.69981   0.80856   0.75026       444
           2    0.38000   0.33333   0.35514       228

    accuracy                        0.64818      1100
   macro avg    0.59938   0.59714   0.59587      1100
weighted avg    0.64073   0.64818   0.64188      1100

Macro-F1:  0.5958700029642655


03/17/2021 17:36:21 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.72032   0.63785   0.67658       428
           1    0.68798   0.79955   0.73958       444
           2    0.37561   0.33772   0.35566       228

    accuracy                        0.64091      1100
   macro avg    0.59464   0.59171   0.59061      1100
weighted avg    0.63582   0.64091   0.63549      1100

Macro-F1:  0.5906071525327367


03/17/2021 17:36:23 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.70918   0.64953   0.67805       428
           1    0.70385   0.78153   0.74066       444
           2    0.37209   0.35088   0.36117       228

    accuracy                        0.64091      1100
   macro avg    0.59504   0.59398   0.59329      1100
weighted avg    0.63716   0.64091   0.63764      1100

Macro-F1:  0.5932947605396274


03/17/2021 17:36:25 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.71574   0.65888   0.68613       428
           1    0.70482   0.79054   0.74522       444
           2    0.37981   0.34649   0.36239       228

    accuracy                        0.64727      1100
   macro avg    0.60012   0.59864   0.59791      1100
weighted avg    0.64170   0.64727   0.64288      1100

Macro-F1:  0.5979132126328456


03/17/2021 17:36:28 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.72081   0.66355   0.69100       428
           1    0.69941   0.80180   0.74711       444
           2    0.39594   0.34211   0.36706       228

    accuracy                        0.65273      1100
   macro avg    0.60539   0.60249   0.60172      1100
weighted avg    0.64484   0.65273   0.64650      1100

Macro-F1:  0.6017235886984037


03/17/2021 17:36:30 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.72800   0.63785   0.67995       428
           1    0.69505   0.79054   0.73973       444
           2    0.36364   0.35088   0.35714       228

    accuracy                        0.64000      1100
   macro avg    0.59556   0.59309   0.59227      1100
weighted avg    0.63918   0.64000   0.63717      1100

Macro-F1:  0.5922730237798731


03/17/2021 17:36:33 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.72798   0.65654   0.69042       428
           1    0.69863   0.80405   0.74764       444
           2    0.38424   0.34211   0.36195       228

    accuracy                        0.65091      1100
   macro avg    0.60362   0.60090   0.60000      1100
weighted avg    0.64488   0.65091   0.64543      1100

Macro-F1:  0.6000035417972517


03/17/2021 17:36:35 - INFO - __main__ - ***** Evaluation Interval Hit *****


              precision    recall  f1-score   support

           0    0.73757   0.62383   0.67595       428
           1    0.70000   0.78829   0.74153       444
           2    0.34874   0.36404   0.35622       228

    accuracy                        0.63636      1100
   macro avg    0.59544   0.59205   0.59123      1100
weighted avg    0.64181   0.63636   0.63615      1100

Macro-F1:  0.5912326555943621


#### Evaluations with frequency-matched scrambling

In [105]:
# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
corrupt_method = "S2"
data_file_name = task_name if task_name != "sst3" else "sst-tenary"
corrupt_datasets = get_dataset(f"../data-files/{data_file_name}-corrupted-{corrupt_method}")
logger.info(f"***** Train Sample Count (Verify): %s *****"%(len(datasets["train"])))
logger.info(f"***** Valid Sample Count (Verify): %s *****"%(len(datasets["validation"])))

corrupt_validation_input_features = []
corrupt_validation_label_ids = []
for (ex_index, example) in enumerate(tqdm(corrupt_datasets["validation"])):
    bow_feature = torch.zeros(len(original_vocab))
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    for t in sentence_tokens:
        bow_feature[original_vocab[t]] += 1
    corrupt_validation_input_features.append(bow_feature)
    corrupt_validation_label_ids.append(example["label"])
    
corrupt_validation_input_features = torch.stack(corrupt_validation_input_features, dim=0)
corrupt_validation_input_features = torch.tensor(corrupt_validation_input_features, dtype=torch.float)
corrupt_validation_label_ids = torch.tensor(corrupt_validation_label_ids, dtype=torch.long)
corrupt_validation_data = TensorDataset(corrupt_validation_input_features, corrupt_validation_label_ids)
corrupt_validation_dataloader = DataLoader(corrupt_validation_data, batch_size=per_device_eval_batch_size*n_gpu, shuffle=False)

logger.info("***** Evaluation With Corrupt Data *****")
model.eval()
all_logits = []
all_label_ids = []
with torch.no_grad():
    # pbar = tqdm(validation_dataloader, desc="Iteration")
    for step, batch in enumerate(corrupt_validation_dataloader):
        if torch.cuda.is_available() and not no_cuda:
            torch.cuda.empty_cache()

        input_features, label_ids = batch

        if torch.cuda.is_available() and not no_cuda:
            input_features = input_features.to(device)
            label_ids = label_ids.to(device)

        loss, logits = model(input_features, labels=label_ids)
        logits = F.softmax(logits, dim=-1)
        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        outputs = np.argmax(logits, axis=1)
        all_logits.append(outputs)
        all_label_ids.append(label_ids)

all_logits = np.concatenate(all_logits, axis=0)
all_label_ids = np.concatenate(all_label_ids, axis=0)
result_to_save = classification_report(all_label_ids, all_logits, digits=5, output_dict=True)
print(classification_report(all_label_ids, all_logits, digits=5))
print("Macro-F1: ", result_to_save["macro avg"]["f1-score"])

03/17/2021 21:33:15 - INFO - __main__ - ***** Loading pre-loaded datasets from the disk directly! *****
03/17/2021 21:33:15 - INFO - __main__ - ***** Inoculation Sample Count: 159274 *****
03/17/2021 21:33:15 - INFO - __main__ - ***** Train Sample Count (Verify): 159274 *****
03/17/2021 21:33:15 - INFO - __main__ - ***** Valid Sample Count (Verify): 1100 *****
100%|██████████| 1100/1100 [00:00<00:00, 1299.60it/s]
03/17/2021 21:33:16 - INFO - __main__ - ***** Evaluation With Corrupt Data *****


              precision    recall  f1-score   support

           0    0.35072   0.28271   0.31307       428
           1    0.39341   0.40315   0.39822       444
           2    0.22333   0.29386   0.25379       228

    accuracy                        0.33364      1100
   macro avg    0.32249   0.32657   0.32169      1100
weighted avg    0.34155   0.33364   0.33515      1100

Macro-F1:  0.3216913667394437


#### Evaluations with frequency-unmatched scrambling

In [106]:
# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)
corrupt_method = "S3"
data_file_name = task_name if task_name != "sst3" else "sst-tenary"
corrupt_datasets = get_dataset(f"../data-files/{data_file_name}-corrupted-{corrupt_method}")
logger.info(f"***** Train Sample Count (Verify): %s *****"%(len(datasets["train"])))
logger.info(f"***** Valid Sample Count (Verify): %s *****"%(len(datasets["validation"])))

corrupt_validation_input_features = []
corrupt_validation_label_ids = []
for (ex_index, example) in enumerate(tqdm(corrupt_datasets["validation"])):
    bow_feature = torch.zeros(len(original_vocab))
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    for t in sentence_tokens:
        bow_feature[original_vocab[t]] += 1
    corrupt_validation_input_features.append(bow_feature)
    corrupt_validation_label_ids.append(example["label"])
    
corrupt_validation_input_features = torch.stack(corrupt_validation_input_features, dim=0)
corrupt_validation_input_features = torch.tensor(corrupt_validation_input_features, dtype=torch.float)
corrupt_validation_label_ids = torch.tensor(corrupt_validation_label_ids, dtype=torch.long)
corrupt_validation_data = TensorDataset(corrupt_validation_input_features, corrupt_validation_label_ids)
corrupt_validation_dataloader = DataLoader(corrupt_validation_data, batch_size=per_device_eval_batch_size*n_gpu, shuffle=False)

logger.info("***** Evaluation With Corrupt Data *****")
model.eval()
all_logits = []
all_label_ids = []
with torch.no_grad():
    # pbar = tqdm(validation_dataloader, desc="Iteration")
    for step, batch in enumerate(corrupt_validation_dataloader):
        if torch.cuda.is_available() and not no_cuda:
            torch.cuda.empty_cache()

        input_features, label_ids = batch

        if torch.cuda.is_available() and not no_cuda:
            input_features = input_features.to(device)
            label_ids = label_ids.to(device)

        loss, logits = model(input_features, labels=label_ids)
        logits = F.softmax(logits, dim=-1)
        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.to('cpu').numpy()
        outputs = np.argmax(logits, axis=1)
        all_logits.append(outputs)
        all_label_ids.append(label_ids)

all_logits = np.concatenate(all_logits, axis=0)
all_label_ids = np.concatenate(all_label_ids, axis=0)
result_to_save = classification_report(all_label_ids, all_logits, digits=5, output_dict=True)
print(classification_report(all_label_ids, all_logits, digits=5))
print("Macro-F1: ", result_to_save["macro avg"]["f1-score"])

03/17/2021 21:34:34 - INFO - __main__ - ***** Loading pre-loaded datasets from the disk directly! *****
03/17/2021 21:34:34 - INFO - __main__ - ***** Inoculation Sample Count: 159274 *****
03/17/2021 21:34:34 - INFO - __main__ - ***** Train Sample Count (Verify): 159274 *****
03/17/2021 21:34:34 - INFO - __main__ - ***** Valid Sample Count (Verify): 1100 *****
100%|██████████| 1100/1100 [00:01<00:00, 1073.98it/s]
03/17/2021 21:34:35 - INFO - __main__ - ***** Evaluation With Corrupt Data *****


              precision    recall  f1-score   support

           0    0.48649   0.04206   0.07742       428
           1    0.38462   0.07883   0.13084       444
           2    0.20576   0.87719   0.33333       228

    accuracy                        0.23000      1100
   macro avg    0.35895   0.33269   0.18053      1100
weighted avg    0.38718   0.23000   0.15203      1100

Macro-F1:  0.18053126988912335


#### Random guessing baseline
If we randomly guess the lables, what is the performance now?

In [111]:
# getting avg mF1 on the dataset with a dummy classifier
import numpy as np
from sklearn.dummy import DummyClassifier

mf1s = []
runs = 100
for i in range(runs):
    dummy_clf = DummyClassifier(strategy="stratified")
    dummy_clf.fit(validation_input_features, validation_label_ids)
    dummy_labels = dummy_clf.predict(validation_input_features)

    # dummy performance
    # print(classification_report(validation_label_ids, dummy_labels, digits=5))
    result_to_save = classification_report(validation_label_ids, dummy_labels, digits=5, output_dict=True)
    mf1s += [result_to_save["macro avg"]["f1-score"]]
print(classification_report(validation_label_ids, dummy_labels, digits=5))
print(f"AVG over {runs} runs mF1: {round(sum(mf1s)/len(mf1s), 6)}.")

              precision    recall  f1-score   support

           0    0.38519   0.36449   0.37455       428
           1    0.40088   0.40991   0.40535       444
           2    0.20747   0.21930   0.21322       228

    accuracy                        0.35273      1100
   macro avg    0.33118   0.33123   0.33104      1100
weighted avg    0.35468   0.35273   0.35354      1100

AVG over 100 runs mF1: 0.331816.


#### FrequencyBoW classifiers

In [112]:
# task setups
task_name = "sst3"
num_labels = 3
FILENAME_CONFIG = {
    "sst3" : "sst-tenary"
}

# let us corrupt SST3 in the same way as before
train_df = pd.read_csv(os.path.join(external_output_dirname, FILENAME_CONFIG[task_name], 
                                    f"{FILENAME_CONFIG[task_name]}-train.tsv"), 
                       delimiter="\t")
eval_df = pd.read_csv(os.path.join(external_output_dirname, FILENAME_CONFIG[task_name], 
                                   f"{FILENAME_CONFIG[task_name]}-dev.tsv"), 
                      delimiter="\t")
test_df = pd.read_csv(os.path.join(external_output_dirname, FILENAME_CONFIG[task_name], 
                                   f"{FILENAME_CONFIG[task_name]}-test.tsv"), 
                      delimiter="\t")

train_df = Dataset.from_pandas(train_df)
eval_df = Dataset.from_pandas(eval_df)
test_df = Dataset.from_pandas(test_df)

In [113]:
modified_basic_tokenizer = ModifiedBasicTokenizer()
label_vocab_map = {}
token_frequency_map = {} # overwrite this everytime for a new dataset
for i, example in enumerate(train_df):
    if i % 10000 == 0 and i != 0:
        print(f"processing #{i} example...")
    original_sentence = example['text']
    label = example['label']
    if len(original_sentence.strip()) != 0:
        tokens = modified_basic_tokenizer.tokenize(original_sentence)
        if label not in label_vocab_map.keys():
            label_vocab_map[label] = tokens
        else:
            for t in tokens:
                label_vocab_map[label].append(t)
        for t in tokens:
            if t in token_frequency_map.keys():
                token_frequency_map[t] = token_frequency_map[t] + 1
            else:
                token_frequency_map[t] = 1
for i, example in enumerate(eval_df):
    if i % 10000 == 0 and i != 0:
        print(f"processing #{i} example...")
    original_sentence = example['text']
    label = example['label']
    if len(original_sentence.strip()) != 0:
        tokens = modified_basic_tokenizer.tokenize(original_sentence)
        if label not in label_vocab_map.keys():
            label_vocab_map[label] = tokens
        else:
            for t in tokens:
                label_vocab_map[label].append(t)
        for t in tokens:
            if t in token_frequency_map.keys():
                token_frequency_map[t] = token_frequency_map[t] + 1
            else:
                token_frequency_map[t] = 1
for i, example in enumerate(test_df):
    if i % 10000 == 0 and i != 0:
        print(f"processing #{i} example...")
    original_sentence = example['text']
    label = example['label']
    if len(original_sentence.strip()) != 0:
        tokens = modified_basic_tokenizer.tokenize(original_sentence)
        if label not in label_vocab_map.keys():
            label_vocab_map[label] = tokens
        else:
            for t in tokens:
                label_vocab_map[label].append(t)
        for t in tokens:
            if t in token_frequency_map.keys():
                token_frequency_map[t] = token_frequency_map[t] + 1
            else:
                token_frequency_map[t] = 1
task_token_frequency_map = sorted(token_frequency_map.items(), key=operator.itemgetter(1), reverse=True)
task_token_frequency_map = OrderedDict(task_token_frequency_map)

processing #10000 example...
processing #20000 example...
processing #30000 example...
processing #40000 example...
processing #50000 example...
processing #60000 example...
processing #70000 example...
processing #80000 example...
processing #90000 example...
processing #100000 example...
processing #110000 example...
processing #120000 example...
processing #130000 example...
processing #140000 example...
processing #150000 example...


training BoW with 1st order frequency bins

In [114]:
# freq and bucket mappings
freq_set = set([])
for k, v in task_token_frequency_map.items():
    freq_set.add(v)
freq_set = list(freq_set)
freq_set.sort()
freq_bucket = np.logspace(math.log(freq_set[0], 10), math.log(freq_set[-1], 10), 25, endpoint=True)
freq_bucket = freq_bucket[:-1]
freq_bucket = [math.ceil(n) for n in freq_bucket]
# finally the bucket is a map between freq and bucket number
def find_bucket_number(freq, freq_bucket):
    for i in range(len(freq_bucket)):
        if freq > freq_bucket[i]:
            continue
        else:
            return i+1
    return len(freq_bucket)

freq_bucket_map = {}
for freq in freq_set:
    bucket_num = find_bucket_number(freq, freq_bucket)
    freq_bucket_map[freq] = bucket_num

In [115]:
# FBoW feature vectors for train split
train_input_features = []
train_label_ids = []
for (ex_index, example) in enumerate(tqdm(train_df)):
    bow_feature = torch.zeros(len(freq_bucket))
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    for t in sentence_tokens:
        bow_feature[freq_bucket_map[token_frequency_map[t]]-1] += 1 # bucket count
    if ex_index % 50000 == 0:
        print("Example sentence: " + sentence_combined)
        print(bow_feature)
    train_input_features.append(bow_feature)
    train_label_ids.append(example["label"])
    
train_input_features = torch.stack(train_input_features, dim=0)
train_input_features = torch.tensor(train_input_features, dtype=torch.float)
train_label_ids = torch.tensor(train_label_ids, dtype=torch.long)
train_data = TensorDataset(train_input_features, train_label_ids)

  0%|          | 614/159274 [00:00<00:53, 2962.09it/s]

Example sentence: This is one of the year's best films.
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 0.,
        0., 1., 1., 1., 1., 4.])


 32%|███▏      | 50554/159274 [00:13<00:30, 3534.87it/s]

Example sentence: there is plenty of room for editing, and a much shorter cut surely would have resulted in a smoother, more focused narrative without sacrificing any of the cultural intrigue
tensor([0., 0., 0., 0., 0., 0., 1., 1., 2., 2., 0., 2., 4., 0., 1., 1., 2., 2.,
        1., 1., 1., 1., 1., 8.])


 63%|██████▎   | 100385/159274 [00:29<00:20, 2856.35it/s]

Example sentence: his name was, uh, Michael Zaidan, was supposed to have like written the screenplay or something
tensor([0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 2., 2., 1., 0., 3., 0.,
        2., 2., 0., 0., 0., 5.])


 95%|█████████▍| 150760/159274 [00:43<00:01, 4396.60it/s]

Example sentence: They just don't work in concert.
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 2., 1.,
        0., 1., 0., 0., 1., 2.])


100%|██████████| 159274/159274 [00:45<00:00, 3536.70it/s]


In [116]:
# FBoW feature vectors for validation split
validation_input_features = []
validation_label_ids = []
for (ex_index, example) in enumerate(tqdm(eval_df)):
    bow_feature = torch.zeros(len(freq_bucket))
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    for t in sentence_tokens:
        bow_feature[freq_bucket_map[token_frequency_map[t]]-1] += 1 # bucket count
    validation_input_features.append(bow_feature)
    validation_label_ids.append(example["label"])

validation_input_features = torch.stack(validation_input_features, dim=0)
validation_input_features = torch.tensor(validation_input_features, dtype=torch.float)
validation_label_ids = torch.tensor(validation_label_ids, dtype=torch.long)
validation_data = TensorDataset(validation_input_features, validation_label_ids)

100%|██████████| 1100/1100 [00:00<00:00, 2025.39it/s]


In [117]:
# data loader
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=per_device_train_batch_size*n_gpu)
validation_dataloader = DataLoader(validation_data, batch_size=per_device_eval_batch_size*n_gpu, shuffle=False)

In [118]:
# some overriding fun stuffs!
lr = 1e-3
num_train_epochs = 20
model = MockBERTBOWClassifier(len(validation_label_ids.unique()), len(freq_bucket))
optimizer = optim.Adam(model.parameters(), lr=lr)
if n_gpu > 0 and not no_cuda:
    model = torch.nn.DataParallel(model)

In [119]:
global_step = 0
max_score = -1
for _ in range(int(num_train_epochs)):
    
    model.train()
    # pbar = tqdm(train_dataloader, desc="Iteration")
    for step, batch in enumerate(train_dataloader):
        if torch.cuda.is_available() and not no_cuda:
            torch.cuda.empty_cache()

        input_features, label_ids = batch

        if torch.cuda.is_available() and not no_cuda:
            input_features = input_features.to(device)
            label_ids = label_ids.to(device)

        loss, _ = model(input_features, labels=label_ids)

        if n_gpu > 1:
            loss = loss.mean() # mean() to average on multi-gpu.
        loss.backward()

        optimizer.step()
        model.zero_grad()
        # pbar.set_postfix({'train_loss': loss.tolist()})

        if global_step % 500 == 0:
            # logger.info("***** Evaluation Interval Hit *****")
            model.eval()
            all_logits = []
            all_label_ids = []
            with torch.no_grad():
                # pbar = tqdm(validation_dataloader, desc="Iteration")
                for step, batch in enumerate(validation_dataloader):
                    if torch.cuda.is_available() and not no_cuda:
                        torch.cuda.empty_cache()
                        
                    input_features, label_ids = batch
                    
                    if torch.cuda.is_available() and not no_cuda:
                        input_features = input_features.to(device)
                        label_ids = label_ids.to(device)
                    
                    loss, logits = model(input_features, labels=label_ids)
                    logits = F.softmax(logits, dim=-1)
                    logits = logits.detach().cpu().numpy()
                    label_ids = label_ids.to('cpu').numpy()
                    outputs = np.argmax(logits, axis=1)
                    all_logits.append(outputs)
                    all_label_ids.append(label_ids)
                    
            all_logits = np.concatenate(all_logits, axis=0)
            all_label_ids = np.concatenate(all_label_ids, axis=0)
            result_to_save = classification_report(all_label_ids, all_logits, digits=5, output_dict=True)
            # print(classification_report(all_label_ids, all_logits, digits=5))
            print("Macro-F1: ", result_to_save["macro avg"]["f1-score"])
            if result_to_save["macro avg"]["f1-score"] > max_score:
                max_score = result_to_save["macro avg"]["f1-score"]
                    
        global_step += 1
print("Best Macro-F1: ", max_score)

Macro-F1:  0.33223386421234674
Macro-F1:  0.3805544756072737
Macro-F1:  0.356948319862475
Macro-F1:  0.3555990893841361
Macro-F1:  0.39446552989943057
Macro-F1:  0.3906249688153513
Macro-F1:  0.38934033014915365
Macro-F1:  0.3760070440843222
Macro-F1:  0.3825677745637313
Macro-F1:  0.3538522860773374
Macro-F1:  0.38005641677439445
Macro-F1:  0.38470228402965184
Macro-F1:  0.3856994336330514
Macro-F1:  0.3926025252613996
Macro-F1:  0.39569701985863376
Macro-F1:  0.38227380974097724
Macro-F1:  0.3841402824793416
Macro-F1:  0.39446810499442075
Macro-F1:  0.3856967204688082
Macro-F1:  0.3832368082368082
Macro-F1:  0.39444279074348504
Macro-F1:  0.3833216466303751
Macro-F1:  0.38526457790194985
Macro-F1:  0.3817557359042263
Macro-F1:  0.3744302639659564
Macro-F1:  0.394874510845099
Macro-F1:  0.38138840217504494
Macro-F1:  0.39483517480892644
Macro-F1:  0.3921838397993976
Macro-F1:  0.3883187852313321
Macro-F1:  0.38071647390118724
Macro-F1:  0.39052132308947235
Macro-F1:  0.383017735699323

training BoW with 1st and 2nd order frequency bins

In [123]:
# repartition the first order information
second_order_freq_set = set([])
for k, v in task_token_frequency_map.items():
    second_order_freq_set.add(v)
second_order_freq_set = list(second_order_freq_set)
second_order_freq_set.sort()
bucket_count = 36
second_order_freq_bucket = np.logspace(math.log(second_order_freq_set[0], 10), 
                          math.log(second_order_freq_set[-1], 10), bucket_count+1, 
                          endpoint=True)
second_order_freq_bucket = second_order_freq_bucket[:-1]
second_order_freq_bucket = [math.ceil(n) for n in second_order_freq_bucket]
# finally the bucket is a map between freq and bucket number
def find_bucket_number(freq, freq_bucket):
    for i in range(len(freq_bucket)):
        if freq > freq_bucket[i]:
            continue
        else:
            return i+1
    return len(freq_bucket)

second_order_freq_bucket_map = {}
for freq in freq_set:
    bucket_num = find_bucket_number(freq, second_order_freq_bucket)
    second_order_freq_bucket_map[freq] = bucket_num

In [124]:
second_order_freq_bucket_map

{1: 1,
 2: 2,
 3: 4,
 4: 5,
 5: 6,
 6: 7,
 7: 7,
 8: 8,
 9: 8,
 10: 9,
 11: 9,
 12: 9,
 13: 10,
 14: 10,
 15: 10,
 16: 10,
 17: 11,
 18: 11,
 19: 11,
 20: 11,
 21: 11,
 22: 12,
 23: 12,
 24: 12,
 25: 12,
 26: 12,
 27: 12,
 28: 12,
 29: 12,
 30: 13,
 31: 13,
 32: 13,
 33: 13,
 34: 13,
 35: 13,
 36: 13,
 37: 13,
 38: 13,
 39: 13,
 40: 14,
 41: 14,
 42: 14,
 43: 14,
 44: 14,
 45: 14,
 46: 14,
 47: 14,
 48: 14,
 49: 14,
 50: 14,
 51: 14,
 52: 14,
 53: 15,
 54: 15,
 55: 15,
 56: 15,
 57: 15,
 58: 15,
 59: 15,
 60: 15,
 61: 15,
 62: 15,
 63: 15,
 64: 15,
 65: 15,
 66: 15,
 67: 15,
 68: 15,
 69: 15,
 70: 15,
 71: 16,
 72: 16,
 73: 16,
 74: 16,
 75: 16,
 76: 16,
 77: 16,
 78: 16,
 79: 16,
 80: 16,
 81: 16,
 82: 16,
 83: 16,
 84: 16,
 85: 16,
 86: 16,
 87: 16,
 88: 16,
 89: 16,
 90: 16,
 91: 16,
 92: 16,
 93: 16,
 94: 16,
 95: 16,
 96: 17,
 97: 17,
 98: 17,
 99: 17,
 100: 17,
 101: 17,
 102: 17,
 103: 17,
 104: 17,
 105: 17,
 106: 17,
 107: 17,
 108: 17,
 109: 17,
 110: 17,
 111: 17,
 112: 17,


In [None]:
modified_basic_tokenizer = ModifiedBasicTokenizer()
token_freq_freq_map = {} # overwrite this everytime for a new dataset
for i, example in enumerate(train_df):
    if i % 10000 == 0 and i != 0:
        print(f"processing #{i} example...")
    original_sentence = example['text']
    label = example['label']
    if len(original_sentence.strip()) != 0:
        tokens = modified_basic_tokenizer.tokenize(original_sentence)
        for i in range(len(tokens)-1):
            for j in range(i+1, len(tokens)):
                t1 = sentence_tokens[i]
                t2 = sentence_tokens[j]
                index_tuple = [second_order_freq_bucket_map[token_frequency_map[t1]], 
                               second_order_freq_bucket_map[token_frequency_map[t2]]]
                index_tuple.sort()
                index_tuple = tuple(index_tuple)
                
                
task_token_frequency_map = sorted(token_frequency_map.items(), key=operator.itemgetter(1), reverse=True)
task_token_frequency_map = OrderedDict(task_token_frequency_map)

In [42]:
second_order_bucket_index = {}
index = 0
for i in range(1, len(freq_bucket)+1):
    for j in range(i+1, len(freq_bucket)+1):
        second_order_bucket_index[(i,j)] = index
        index += 1

In [43]:
# FBoW feature vectors for train split (2nd order = 1st order concat with 2nd order)
# Note that the concatenation might not be necessary? as it might be catched with as 帝王让train_label_ids = []21
for (ex_index, example) in enumerate(tqdm(train_df)):
    bow_feature = torch.zeros(len(freq_bucket) + len(second_order_bucket_index)) # up-to 2nd feature map
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    # first order here!
    for t in sentence_tokens:
        bow_feature[freq_bucket_map[token_frequency_map[t]]-1] += 1 # bucket count
    # second order here!
    for i in range(len(sentence_tokens)-1):
        for j in range(i+1, len(sentence_tokens)):
            t1 = sentence_tokens[i]
            t2 = sentence_tokens[j]
            if freq_bucket_map[token_frequency_map[t1]] != freq_bucket_map[token_frequency_map[t2]]:
                index_tuple = [freq_bucket_map[token_frequency_map[t1]], freq_bucket_map[token_frequency_map[t2]]]
                index_tuple.sort()
                index_tuple = tuple(index_tuple)
                bow_feature[len(freq_bucket) + second_order_bucket_index[index_tuple]] += 1 # pair of freq bucket count

    if ex_index % 50000 == 0:
        print("Example sentence: " + sentence_combined)
        print(bow_feature)
    train_input_features.append(bow_feature)
    train_label_ids.append(example["label"])
    
train_input_features = torch.stack(train_input_features, dim=0)
train_input_features = torch.tensor(train_input_features, dtype=torch.float)
train_label_ids = torch.tensor(train_label_ids, dtype=torch.long)
train_data = TensorDataset(train_input_features, train_label_ids)

  0%|          | 83/159274 [00:00<03:11, 829.32it/s]

Example sentence: This is one of the year's best films.
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 2., 0.,
        0., 1., 1., 1., 1., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

 32%|███▏      | 50191/159274 [00:45<01:26, 1262.05it/s]

Example sentence: there is plenty of room for editing, and a much shorter cut surely would have resulted in a smoother, more focused narrative without sacrificing any of the cultural intrigue
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  2.,  2.,  0.,  2.,  4.,  0.,
         1.,  1.,  2.,  2.,  1.,  1.,  1.,  1.,  1.,  8.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., 

 63%|██████▎   | 100124/159274 [01:27<00:49, 1195.31it/s]

Example sentence: his name was, uh, Michael Zaidan, was supposed to have like written the screenplay or something
tensor([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  0.,  0.,  0.,  2.,  2.,
         1.,  0.,  3.,  0.,  2.,  2.,  0.,  0.,  0.,  5.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., 

 94%|█████████▍| 150143/159274 [02:10<00:07, 1176.20it/s]

Example sentence: They just don't work in concert.
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 2., 1.,
        0., 1., 0., 0., 1., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 2., 1., 0., 1., 0., 0., 1., 2., 0., 0., 0., 0., 0

100%|██████████| 159274/159274 [02:18<00:00, 1148.13it/s]


In [44]:
# FBoW feature vectors for validation split
validation_input_features = []
validation_label_ids = []
for (ex_index, example) in enumerate(tqdm(eval_df)):
    bow_feature = torch.zeros(len(freq_bucket) + len(second_order_bucket_index)) # up-to 2nd feature map
    if sentence2_key is None:
        sentence_combined = example[sentence1_key]
    else:
        sentence_combined = example[sentence1_key] + " [SEP] " + example[sentence2_key]
    sentence_tokens = modified_basic_tokenizer.tokenize(sentence_combined)
    sentence_tokens = sentence_tokens[:max_length]
    # first order here!
    for t in sentence_tokens:
        bow_feature[freq_bucket_map[token_frequency_map[t]]-1] += 1 # bucket count
    # second order here!
    for i in range(len(sentence_tokens)-1):
        for j in range(i+1, len(sentence_tokens)):
            t1 = sentence_tokens[i]
            t2 = sentence_tokens[j]
            if freq_bucket_map[token_frequency_map[t1]] != freq_bucket_map[token_frequency_map[t2]]:
                index_tuple = [freq_bucket_map[token_frequency_map[t1]], freq_bucket_map[token_frequency_map[t2]]]
                index_tuple.sort()
                index_tuple = tuple(index_tuple)
                bow_feature[len(freq_bucket) + second_order_bucket_index[index_tuple]] += 1 # pair of freq bucket count
    validation_input_features.append(bow_feature)
    validation_label_ids.append(example["label"])

validation_input_features = torch.stack(validation_input_features, dim=0)
validation_input_features = torch.tensor(validation_input_features, dtype=torch.float)
validation_label_ids = torch.tensor(validation_label_ids, dtype=torch.long)
validation_data = TensorDataset(validation_input_features, validation_label_ids)

100%|██████████| 1100/1100 [00:03<00:00, 323.29it/s]


In [48]:
# data loader
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=per_device_train_batch_size*n_gpu)
validation_dataloader = DataLoader(validation_data, batch_size=per_device_eval_batch_size*n_gpu, shuffle=False)

In [49]:
# restart the model
model = MockBERTBOWClassifier(len(validation_label_ids.unique()), len(freq_bucket) + len(second_order_bucket_index))
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)
if n_gpu > 0 and not no_cuda:
    model = torch.nn.DataParallel(model)

In [50]:
global_step = 0
num_train_epochs = 30
max_score = -1
for _ in range(int(num_train_epochs)):
    
    model.train()
    # pbar = tqdm(train_dataloader, desc="Iteration")
    for step, batch in enumerate(train_dataloader):
        if torch.cuda.is_available() and not no_cuda:
            torch.cuda.empty_cache()

        input_features, label_ids = batch

        if torch.cuda.is_available() and not no_cuda:
            input_features = input_features.to(device)
            label_ids = label_ids.to(device)

        loss, _ = model(input_features, labels=label_ids)

        if n_gpu > 1:
            loss = loss.mean() # mean() to average on multi-gpu.
        loss.backward()

        optimizer.step()
        model.zero_grad()
        # pbar.set_postfix({'train_loss': loss.tolist()})

        if global_step % 250 == 0:
            logger.info("***** Evaluation Interval Hit *****")
            model.eval()
            all_logits = []
            all_label_ids = []
            with torch.no_grad():
                # pbar = tqdm(validation_dataloader, desc="Iteration")
                for step, batch in enumerate(validation_dataloader):
                    if torch.cuda.is_available() and not no_cuda:
                        torch.cuda.empty_cache()
                        
                    input_features, label_ids = batch
                    
                    if torch.cuda.is_available() and not no_cuda:
                        input_features = input_features.to(device)
                        label_ids = label_ids.to(device)
                    
                    loss, logits = model(input_features, labels=label_ids)
                    logits = F.softmax(logits, dim=-1)
                    logits = logits.detach().cpu().numpy()
                    label_ids = label_ids.to('cpu').numpy()
                    outputs = np.argmax(logits, axis=1)
                    all_logits.append(outputs)
                    all_label_ids.append(label_ids)
                    
            all_logits = np.concatenate(all_logits, axis=0)
            all_label_ids = np.concatenate(all_label_ids, axis=0)
            result_to_save = classification_report(all_label_ids, all_logits, digits=5, output_dict=True)
            # print(classification_report(all_label_ids, all_logits, digits=5))
            print("Macro-F1: ", result_to_save["macro avg"]["f1-score"])
            if result_to_save["macro avg"]["f1-score"] > max_score:
                max_score = result_to_save["macro avg"]["f1-score"]
                    
        global_step += 1
print("Best Macro-F1: ", max_score)

03/15/2021 22:01:36 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.26465374257277996


03/15/2021 22:01:37 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3849117003935271


03/15/2021 22:01:37 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.33312358005787224


03/15/2021 22:01:38 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.39216997619791044


03/15/2021 22:01:39 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3827080906711444


03/15/2021 22:01:39 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3861029770092592


03/15/2021 22:01:40 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3954782550915313


03/15/2021 22:01:40 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3753367702737842


03/15/2021 22:01:41 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36673013836654283


03/15/2021 22:01:41 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.35828267237856926


03/15/2021 22:01:42 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37487471430197505


03/15/2021 22:01:43 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36452234585587134


03/15/2021 22:01:43 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.38704398321802674


03/15/2021 22:01:44 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.367700667611971


03/15/2021 22:01:44 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.38717515220992155


03/15/2021 22:01:45 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36287540014192454


03/15/2021 22:01:46 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36750048573257316


03/15/2021 22:01:46 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3718295722896891


03/15/2021 22:01:47 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3739680472101336


03/15/2021 22:01:47 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.38340047871281807


03/15/2021 22:01:48 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3691166509543012


03/15/2021 22:01:48 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37794799435597665


03/15/2021 22:01:49 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3666181968966593


03/15/2021 22:01:50 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.395071985780241


03/15/2021 22:01:50 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3677363088444458


03/15/2021 22:01:51 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3766712085835382


03/15/2021 22:01:51 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3877688893757536


03/15/2021 22:01:52 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3631892905007484


03/15/2021 22:01:53 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.38161909108015


03/15/2021 22:01:53 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3851847525216518


03/15/2021 22:01:54 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37694433006385


03/15/2021 22:01:54 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3857505577414764


03/15/2021 22:01:55 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3720237188351562


03/15/2021 22:01:56 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3763660262857029


03/15/2021 22:01:56 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37583857877467547


03/15/2021 22:01:57 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3705145153005207


03/15/2021 22:01:57 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3704313730701782


03/15/2021 22:01:58 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3757634899204885


03/15/2021 22:01:59 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36472363503876076


03/15/2021 22:01:59 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3806590858920995


03/15/2021 22:02:00 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3668103781633774


03/15/2021 22:02:00 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3679673385403159


03/15/2021 22:02:01 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3623292037339973


03/15/2021 22:02:01 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3838446616080775


03/15/2021 22:02:02 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3715017360159756


03/15/2021 22:02:03 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3679756071669295


03/15/2021 22:02:03 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.377877290754333


03/15/2021 22:02:04 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37782638241334965


03/15/2021 22:02:04 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37002411614700487


03/15/2021 22:02:05 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3757617523678523


03/15/2021 22:02:06 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3764214873793564


03/15/2021 22:02:06 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3648619533210982


03/15/2021 22:02:07 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3698294346978557


03/15/2021 22:02:07 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3668667356658683


03/15/2021 22:02:08 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3818227559212691


03/15/2021 22:02:08 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37243588531613203


03/15/2021 22:02:09 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3734812079204645


03/15/2021 22:02:10 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36514063906301414


03/15/2021 22:02:10 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37553082800842663


03/15/2021 22:02:11 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37376907866285713


03/15/2021 22:02:11 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36766964759214166


03/15/2021 22:02:12 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36530776730272274


03/15/2021 22:02:12 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36889535024523123


03/15/2021 22:02:13 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3670527614915886


03/15/2021 22:02:14 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36469871782151464


03/15/2021 22:02:14 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36265205992886934


03/15/2021 22:02:15 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.38097602540963105


03/15/2021 22:02:15 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36690868361755974


03/15/2021 22:02:16 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.360459484467454


03/15/2021 22:02:17 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3598470695749172


03/15/2021 22:02:17 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37509714556858326


03/15/2021 22:02:18 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3792722418898222


03/15/2021 22:02:18 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3755737599835904


03/15/2021 22:02:19 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.375396841438925


03/15/2021 22:02:19 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3745164972889789


03/15/2021 22:02:20 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.35750326409610506


03/15/2021 22:02:21 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36198536830042577


03/15/2021 22:02:21 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37563723572871455


03/15/2021 22:02:22 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.363106544886971


03/15/2021 22:02:22 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3691028876872433


03/15/2021 22:02:23 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3663022345897045


03/15/2021 22:02:24 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3686544404040137


03/15/2021 22:02:24 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3683450147735862


03/15/2021 22:02:25 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37586938897422767


03/15/2021 22:02:25 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3629765571884113


03/15/2021 22:02:26 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36711890301925676


03/15/2021 22:02:26 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.35520302704097473


03/15/2021 22:02:27 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3693716135159315


03/15/2021 22:02:28 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37051645672335326


03/15/2021 22:02:28 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3807557809993179


03/15/2021 22:02:29 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3749062386219674


03/15/2021 22:02:29 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3791587896584281


03/15/2021 22:02:30 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36552569590898504


03/15/2021 22:02:30 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3623121834165863


03/15/2021 22:02:30 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37007271816137594


03/15/2021 22:02:31 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3731623747242603


03/15/2021 22:02:31 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36285073593730105


03/15/2021 22:02:32 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.366116928897007


03/15/2021 22:02:32 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37087576942195605


03/15/2021 22:02:32 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37157199930503954


03/15/2021 22:02:33 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3766432386013007


03/15/2021 22:02:33 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3821629810170542


03/15/2021 22:02:34 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3742825809314792


03/15/2021 22:02:34 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37215180162703904


03/15/2021 22:02:35 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3554314624982869


03/15/2021 22:02:35 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37297693401731374


03/15/2021 22:02:35 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3727088617213217


03/15/2021 22:02:36 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3626779306800032


03/15/2021 22:02:36 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3615576317476243


03/15/2021 22:02:37 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37154811440348584


03/15/2021 22:02:37 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37321845919184643


03/15/2021 22:02:37 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3619379678499402


03/15/2021 22:02:38 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.38211189291375053


03/15/2021 22:02:38 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37036493826671113


03/15/2021 22:02:39 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36707585990117475


03/15/2021 22:02:39 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3702204855951103


03/15/2021 22:02:39 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3779103108984862


03/15/2021 22:02:40 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3747806566178673


03/15/2021 22:02:40 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3659171795499605


03/15/2021 22:02:41 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36732148949869997


03/15/2021 22:02:41 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37605479277158627


03/15/2021 22:02:42 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3692183329520324


03/15/2021 22:02:42 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37211444738144284


03/15/2021 22:02:42 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3713098980465808


03/15/2021 22:02:43 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37510074246819486


03/15/2021 22:02:43 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3666555254758361


03/15/2021 22:02:44 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37592902753808594


03/15/2021 22:02:44 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36429995159975936


03/15/2021 22:02:45 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36959043246065715


03/15/2021 22:02:45 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37086920388194317


03/15/2021 22:02:45 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3773994795184092


03/15/2021 22:02:46 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.378786287284303


03/15/2021 22:02:46 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36103637926655335


03/15/2021 22:02:47 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36963656094131175


03/15/2021 22:02:47 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36776747393524384


03/15/2021 22:02:48 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3744869256717142


03/15/2021 22:02:48 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36694792782119684


03/15/2021 22:02:49 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36960970577905355


03/15/2021 22:02:49 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.371662518683557


03/15/2021 22:02:49 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3709019987901385


03/15/2021 22:02:50 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37771517264288007


03/15/2021 22:02:50 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3684753974207289


03/15/2021 22:02:51 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37153759090269217


03/15/2021 22:02:51 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36820217604947186


03/15/2021 22:02:52 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3852839121273919


03/15/2021 22:02:52 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3691268885316039


03/15/2021 22:02:52 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36272535500007436


03/15/2021 22:02:53 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.3698808576129977


03/15/2021 22:02:53 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.37190842649302375


03/15/2021 22:02:54 - INFO - __main__ - ***** Evaluation Interval Hit *****


Macro-F1:  0.36907919912370213
Best Macro-F1:  0.3954782550915313
