In [2]:
from spanemo.learner import Trainer, EvaluateOnTest
from spanemo.model import SpanEmo
from spanemo.data_loader import DataClass
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import roc_auc_score
import json

seed = 12345678

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

if str(device) == 'cuda:0':
    print("Currently using GPU: {}".format(device))
    np.random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    print("WARNING: USING CPU")

Currently using GPU: cuda:0


In [4]:
def make_loaders(args, test=False):
    if test:
        test_dataset = DataClass(args, args['test_path'])
        test_data_loader = DataLoader(test_dataset,
                                    batch_size=int(args['test_batch_size']),
                                    shuffle=False)
        print('The number of Test batches: ', len(test_data_loader))

        return test_data_loader
    else:
        train_dataset = DataClass(args, args['train_path'])
        train_data_loader = DataLoader(train_dataset,
                                    batch_size=int(args['train_batch_size']),
                                    shuffle=True
                                    )
        print('The number of training batches: ', len(train_data_loader))

        val_dataset = DataClass(args, args['val_path'])
        val_data_loader = DataLoader(val_dataset,
                                    batch_size=int(args['val_batch_size']),
                                    shuffle=False
                                    )
        print('The number of validation batches: ', len(val_data_loader))

        return train_data_loader, val_data_loader

In [5]:
def make_model(args):
    model = SpanEmo(output_dropout=args['output_dropout'],
                    backbone=args['backbone'],
                    joint_loss=args['loss_type'],
                    alpha=args['alpha_loss'])
    
    return model


In [6]:
def pipeline(args, loaders=None):
    now = datetime.datetime.now()
    filename = now.strftime("%Y-%m-%d-%H:%M:%S")
    fw = open('configs/' + filename + '.json', 'a')
    json.dump(args, fw, sort_keys=True, indent=2)

    if loaders is None:
        train_data_loader, val_data_loader = make_loaders(args)
    else:
        train_data_loader, val_data_loader = loaders
    model = make_model(args)

    learn = Trainer(model, train_data_loader, val_data_loader, filename=filename)
    learn.fit(
        num_epochs=int(args['max_epoch']),
        args=args,
        device=device
    )

In [7]:
hyperparams = {
    'train_path':'data/train.csv', 
    'val_path':'data/val.csv',
    'backbone':'bert-base-uncased',
    'train_batch_size': 64,
    'val_batch_size': 128,
    'output_dropout': 0.1,
    'loss_type': 'joint',
    'alpha_loss': 0.2,
    'max_epoch': 20,
    'max_length': 128,
    'ffn_lr': 1e-4,
    'bert_lr': 2e-5
}

In [8]:
assert False

AssertionError: 

## Train

In [None]:
loaders = make_loaders(hyperparams)

  self.tok = re.compile(r"({})".format("|".join(pipeline)))


Reading twitter_2018 - 1grams ...
Reading twitter_2018 - 2grams ...


  regexes = {k.lower(): re.compile(self.expressions[k]) for k, v in


Reading twitter_2018 - 1grams ...


PreProcessing dataset ...:   0%|          | 90/43410 [00:00<00:37, 1162.87it/s]


KeyboardInterrupt: 

In [None]:
#wooooooooooooooooooooo
pipeline(hyperparams, loaders=loaders)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Train_Loss,Val_Loss,F1-Macro,F1-Micro,JS,Time
0.3115,0.2038,0.4528,0.5725,0.5213,02:56
0.1987,0.1906,0.5175,0.6095,0.5737,02:56
0.187,0.1893,0.5275,0.6111,0.5743,02:56
0.1777,0.1905,0.5418,0.6131,0.5776,02:56
0.1686,0.1948,0.5349,0.6042,0.5693,02:56
0.1585,0.2006,0.5349,0.5925,0.5595,02:56
0.1478,0.2083,0.5267,0.5821,0.5464,02:56
0.1378,0.2164,0.5128,0.5815,0.551,02:56
0.1294,0.2197,0.5156,0.5799,0.5455,02:56
0.1225,0.2256,0.5015,0.5728,0.5433,02:56


epoch#:  1
Validation loss decreased (inf --> 0.203779).  Saving model ...
epoch#:  2
Validation loss decreased (0.203779 --> 0.190562).  Saving model ...
epoch#:  3
Validation loss decreased (0.190562 --> 0.189265).  Saving model ...
epoch#:  4
EarlyStopping counter: 1 out of 10
epoch#:  5
EarlyStopping counter: 2 out of 10
epoch#:  6
EarlyStopping counter: 3 out of 10
epoch#:  7
EarlyStopping counter: 4 out of 10
epoch#:  8
EarlyStopping counter: 5 out of 10
epoch#:  9
EarlyStopping counter: 6 out of 10
epoch#:  10
EarlyStopping counter: 7 out of 10
epoch#:  11
EarlyStopping counter: 8 out of 10
epoch#:  12
EarlyStopping counter: 9 out of 10
epoch#:  13
EarlyStopping counter: 10 out of 10
Early stopping


## Test

In [32]:
def test(args, model_path, loader=None):

    if loader is None:
        test_data_loader = make_loaders(args, test=True)
    else:
        test_data_loader = loader

    model = make_model(args)
    
    learn = EvaluateOnTest(model, test_data_loader, model_path='models/' + model_path)
    return learn.predict(device=device)

In [33]:
hyperparams['test_path'] = 'data/test.csv'
hyperparams['test_batch_size'] = 128

In [34]:
test_loader = make_loaders(hyperparams, test=True)

Reading twitter_2018 - 1grams ...
Reading twitter_2018 - 2grams ...
Reading twitter_2018 - 1grams ...


PreProcessing dataset ...: 100%|██████████| 5427/5427 [00:04<00:00, 1157.57it/s]


The number of Test batches:  43


In [35]:
preds = test(hyperparams, "2023-10-19-21:46:58_checkpoint.pt", loader=test_loader)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


F1-Macro: 0.5241 F1-Micro: 0.6233 JS: 0.5871 Time: 00:07


In [83]:
print("Macro AUC:", roc_auc_score(preds["y_true"], preds["logits"], average="macro"))
print("Micro AUC:", roc_auc_score(preds["y_true"], preds["logits"], average="micro"))

Macro AUC: 0.9390444180771101
Micro AUC: 0.9642113212909524


In [82]:
label_names = ["admiration", "amusement", "anger", "annoyance", "approval", "caring", "confusion", "curiosity", "desire", "disappointment", "disapproval", "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief", "joy", "love", "nervous", "optimism", "pride", "realization", "relief", "remorse", "sadness", "surprise", "neutral"]

y_true = pd.DataFrame(preds['y_true'], columns=label_names)
y_pred = pd.DataFrame(preds['y_pred'], columns=label_names)
logits = pd.DataFrame(preds['logits'], columns=label_names)

correlation = "pearson"

y_true_corr = y_true.corr(correlation)
y_pred_corr = y_pred.corr(correlation)
logits_corr = logits.corr(correlation)

In [None]:
# _ = sns.clustermap(logits_corr, annot=False)

In [None]:
# data = pd.read_csv("data/test.csv")

# wrongs = [(i[0], i[1].sum(), [(j, label_names[j]) for j, x in enumerate(i[1]) if x]) for i in enumerate(preds['y_true'] != preds['y_pred']) if i[1].any()]
# test_data = [(i, data['text'][i], [(j, label_names[j]) for j in range(28) if data[str(j)][i]]) for i in data.index]
# test_data = [test_data[i[0]] for i in wrongs]