In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "7"

import pandas as pd
import random
from dataclasses import dataclass
import numpy as np
import torch
# import seaborn as sns
import transformers
import json
import glob
from tqdm import tqdm
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaModel, RobertaTokenizer
from torchmetrics import MetricCollection
from torchmetrics.classification import Accuracy, AUROC, F1Score, Precision, Recall
from itertools import chain

# seed torch operations
SEED = 13
torch.manual_seed(SEED)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7f7676cc0250>

In [2]:
# get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"using {device} device...")

using cuda device...


In [3]:
file_list = glob.glob("/opt/localdata/Data/bea/nlp/bmi550/project/chronic_pain_model_data/*.csv")
file_list

['/opt/localdata/Data/bea/nlp/bmi550/project/chronic_pain_model_data/dev.csv',
 '/opt/localdata/Data/bea/nlp/bmi550/project/chronic_pain_model_data/test.csv',
 '/opt/localdata/Data/bea/nlp/bmi550/project/chronic_pain_model_data/train.csv']

In [4]:
df_list = [pd.read_csv(f) for f in file_list]
df = pd.concat(df_list, axis=0).reset_index()
df

Unnamed: 0,index,tweet_id,text,label
0,0,1365360731055673345,new publication. chronic pain assessment is un...,0
1,1,1365571852245078018,today is a bad day for me. chronic pain is a b...,1
2,2,1365710497903960066,<hashtag> call for submissions\n\nif you’re a...,0
3,3,1365783013674606598,chronic pain does not <allcaps> have to contro...,0
4,4,1366344621786341381,call for participants for <user> phd research:...,0
...,...,...,...,...
3294,2105,1478736266682408961,"chronic pain and the self pity, depression tra...",0
3295,2106,1478743181357469697,pinpointing pain is not always easy as our bod...,0
3296,2107,1478787131451625477,“it is just amazing how chronic pain can paral...,0
3297,2108,1478801225399238656,anyone i know with autoimmune / chronic pain i...,1


In [5]:
df.drop_duplicates()

Unnamed: 0,index,tweet_id,text,label
0,0,1365360731055673345,new publication. chronic pain assessment is un...,0
1,1,1365571852245078018,today is a bad day for me. chronic pain is a b...,1
2,2,1365710497903960066,<hashtag> call for submissions\n\nif you’re a...,0
3,3,1365783013674606598,chronic pain does not <allcaps> have to contro...,0
4,4,1366344621786341381,call for participants for <user> phd research:...,0
...,...,...,...,...
3294,2105,1478736266682408961,"chronic pain and the self pity, depression tra...",0
3295,2106,1478743181357469697,pinpointing pain is not always easy as our bod...,0
3296,2107,1478787131451625477,“it is just amazing how chronic pain can paral...,0
3297,2108,1478801225399238656,anyone i know with autoimmune / chronic pain i...,1


In [6]:
df.label.value_counts(dropna=False)

label
0    2765
1     534
Name: count, dtype: int64

In [7]:
@dataclass
class FrameParams:
    df: pd.DataFrame
    class_name: str
    class_val: float


# function to get the set of unique patient ids in the dataframe
# then split based on the train/val/test proportion
def split_ids(id_col, test_prop, validation, seed):
    # get set of unique ids and convert to a list
    id_list = list(set(id_col))

    # shuffle id list
    random.Random(seed).shuffle(id_list)

    # get split lengths
    id_list_len = len(id_list)

    # get the length of indexes to add to the train/test sets
    train_prop = 1.0 - (2 * test_prop)
    train_len = int(train_prop * id_list_len)
    test_len = int(test_prop * id_list_len)

    # index set ids
    if validation:
        train_ids = id_list[:train_len]
        val_ids = id_list[train_len:train_len+test_len]

    else:
        train_ids = id_list[:train_len+test_len]
        val_ids = None

    test_ids = id_list[train_len+test_len:]

    print('total ids:', id_list_len)

    print('train ids: {}, prop: {:.3f}'.format(
        len(train_ids),
        len(train_ids) / id_list_len
    ))

    if validation:
        print('val ids: {}, prop: {:.3f}'.format(
            len(val_ids),
            len(val_ids) / id_list_len
        ))

    print('test ids: {}, prop: {:.3f}\n'.format(
        len(test_ids),
        len(test_ids) / id_list_len
    ))

    return train_ids, val_ids, test_ids

# function to index pos/neg dataframes by set patient ids and merge them
def index_dataframes(df_obj_list, ids, id_var):
    # zip pos/neg dataframes and ids
    components = zip([df_obj.df for df_obj in df_obj_list], ids)

    # index dataframes by ids for pos/neg
    df_list = [df[df[id_var].isin(ids)] for df, ids in components]

    # merge pos/neg dataframes
    out_df = pd.concat(df_list, axis=0)
    return out_df

# function to split a positive and negative dataframe into train/val/test
# then merge positive and negative for each
def split_n_dataframes(df_list, id_var: str = 'tweet_id',
                       test_prop: float = 0.2, seed: int = 13,
                       validation: bool = True, label_col: str = 'label'):
    # add label columns to dataframes
    for df_obj in df_list:
        df_obj.df.loc[:, 'class_label'] = df_obj.class_val

    # get empty list to put dataframe set IDs
    df_ids = []

    # get ids for each split dataframe
    for df_obj in df_list:
        train_ids, val_ids, test_ids = split_ids(
            df_obj.df[id_var],
            test_prop,
            validation,
            seed
        )
        df_ids.append([train_ids, val_ids, test_ids])

    # transpose list to get sublists of all train set IDs, val sets IDs, etc.
    trans_df_ids = [i for i in zip(*df_ids)]

    # prepare lists for indexing
    train_ids = trans_df_ids[0]
    val_ids = trans_df_ids[1]
    test_ids = trans_df_ids[2]

    # index split dataframes
    train_df = index_dataframes(df_list, train_ids, id_var)
    test_df = index_dataframes(df_list, test_ids, id_var)
    if validation:
        val_df = index_dataframes(df_list, val_ids, id_var)

    # shuffle dataframes
    train_df = train_df.sample(frac=1, random_state=seed).reset_index()
    test_df = test_df.sample(frac=1, random_state=seed).reset_index()
    if validation:
        val_df = val_df.sample(frac=1, random_state=seed).reset_index()
    else:
        val_df = None

    return train_df, val_df, test_df

In [8]:
# define constants
TEST_SIZE = 0.2
VALIDATION = True
SEED = 13

# split on label first
pos_df = df[df['label'] == 1]
neg_df = df[df['label'] == 0]

df_list = [FrameParams(pos_df, 'pos', 1.0), FrameParams(neg_df, 'neg', 0.0)]

# split each label df by username, then combine them into a single
# dataframe for each split
train_df, val_df, test_df = split_n_dataframes(
    df_list,
    test_prop=TEST_SIZE,
    validation=VALIDATION,
    seed=SEED
)

print('train size:', len(train_df))
print('val size:', len(val_df))
print('test size:', len(test_df))

total ids: 534
train ids: 320, prop: 0.599
val ids: 106, prop: 0.199
test ids: 108, prop: 0.202

total ids: 2765
train ids: 1659, prop: 0.600
val ids: 553, prop: 0.200
test ids: 553, prop: 0.200

train size: 1979
val size: 659
test size: 661


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_obj.df.loc[:, 'class_label'] = df_obj.class_val
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_obj.df.loc[:, 'class_label'] = df_obj.class_val


In [10]:
print(f'\ntrain distribution:\n{train_df.label.value_counts(dropna=False, normalize=True)}')
print(f'\nval distribution:\n{val_df.label.value_counts(dropna=False, normalize=True)}')
print(f'\ntest distribution:\n{test_df.label.value_counts(dropna=False, normalize=True)}')


train distribution:
label
0    0.838302
1    0.161698
Name: proportion, dtype: float64

val distribution:
label
0    0.83915
1    0.16085
Name: proportion, dtype: float64

test distribution:
label
0    0.836611
1    0.163389
Name: proportion, dtype: float64


In [11]:
print(f'\ntrain distribution:\n{train_df.label.value_counts(dropna=False, normalize=False)}')
print(f'\nval distribution:\n{val_df.label.value_counts(dropna=False, normalize=False)}')
print(f'\ntest distribution:\n{test_df.label.value_counts(dropna=False, normalize=False)}')


train distribution:
label
0    1659
1     320
Name: count, dtype: int64

val distribution:
label
0    553
1    106
Name: count, dtype: int64

test distribution:
label
0    553
1    108
Name: count, dtype: int64


In [10]:
# load roberta base as a tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True)

In [11]:
class TweetDataset(Dataset):
    """
    class is very closely based on the huggingface tutorial implementation
    """
    def __init__(self, dataframe, tokenizer, max_len, id_col: str = 'tweet_id',
                 text_col: str = 'text', target_col: str = 'class_label'):
        self.tokenizer = tokenizer
        # self.data = dataframe
        self.tweet_id_list = list(dataframe[id_col])
        self.text_list = list(dataframe[text_col])
        self.label_list = list(dataframe[target_col])
        self.max_len = max_len

    def __len__(self):
        # get length of dataset (required for dataloader)
        return len(self.text_list)

    def __getitem__(self, idx):
        # extract text
        text = str(self.text_list[idx])

        # extract label
        label = self.label_list[idx]

        # tokenize text
        encoded_text = self.tokenizer.encode_plus(
            text,
            # add_special_tokens=True,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True
        )

        # unpack encoded text
        ids = encoded_text['input_ids']
        attention_mask = encoded_text['attention_mask']
        token_type_ids = encoded_text["token_type_ids"]

        # wrap outputs in dict
        out_dict = {
            'tweet_id_list': self.tweet_id_list,
            'id_tensor': torch.tensor(ids, dtype=torch.long),
            'mask_tensor': torch.tensor(attention_mask, dtype=torch.long),
            'token_type_tensor': torch.tensor(token_type_ids, dtype=torch.long),
            'label_tensor': torch.tensor(label, dtype=torch.float)
        }

        return out_dict

In [12]:
MAX_LEN = 256

# load dataframes into dataset objects
train_ds = TweetDataset(train_df, tokenizer, MAX_LEN)
val_ds = TweetDataset(val_df, tokenizer, MAX_LEN)
test_ds = TweetDataset(test_df, tokenizer, MAX_LEN)

In [13]:
def get_dataloader(dataset, batch_size, shuffle: bool = True,
                   pin_memory: bool = True, num_workers: int = 0,
                   prefetch_factor: int or None = None):
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        pin_memory=pin_memory,
        num_workers=num_workers,
        prefetch_factor=prefetch_factor
    )
    return dataloader

BATCH_SIZE = 128

# load datasets into loaders
train_loader = get_dataloader(train_ds, BATCH_SIZE)
val_loader = get_dataloader(val_ds, BATCH_SIZE)
test_loader = get_dataloader(test_ds, BATCH_SIZE)

In [14]:
class CustomRoberta(torch.nn.Module):
    """
    model subclass to define the RoBERTa architecture, also closely based on
    the huggingface tutorial implementation
    """
    def __init__(self, drop_percent, num_classes, pt_model_name: str = 'roberta-base'):
        super().__init__()
        self.base_model = RobertaModel.from_pretrained(pt_model_name)
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(drop_percent)
        self.classifier = torch.nn.Linear(768, num_classes)

    def forward(self, input_ids, attention_mask, token_type_ids):
        # get outputs from base model
        base_outputs = self.base_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )
        # extract hidden state from roberta base outputs
        hidden_state = base_outputs[0]
        x = hidden_state[:, 0]

        # define the linear layer preceding the classifier
        # and apply ReLU activation to its outputs
        x = self.pre_classifier(x)
        x = torch.nn.ReLU()(x)

        # define the dropout layer and classifier
        # and apply Sigmoid activation to its outputs
        x = self.dropout(x)
        x = self.classifier(x)
        outputs = torch.nn.Sigmoid()(x)
        return outputs

In [15]:
# build the model and send it to the gpu
model = CustomRoberta(0.3, 1)
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


CustomRoberta(
  (base_model): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): La

In [16]:
def train_model(model, loader_dict, metric_collection, 
                criterion, optimizer, save_dir: str or None = None, 
                num_epochs: int = 25, monitor_metric: str = 'val_loss'):
    if save_dir is not None:
        # if save dir doesn't exist, make it
        Path(save_dir).mkdir(parents=True, exist_ok=True)
        model_save_path = os.path.join(save_dir, 'best_model_params.pth')
    
    # save base weights
    torch.save(model.state_dict(), model_save_path)

    # initialize the best metric based on what the monitor metric is
    # (and if it should be maximized or minimized)
    if monitor_metric.split('_')[-1] == 'loss':
        best_metric = np.inf
    else:
        best_metric = -np.inf

    # iterate over epochs
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch} {'-' * 40}")

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            # running_size = 0

            # select current data loader
            phase_loader = loader_dict[phase]
            phase_size = len(phase_loader)

            # iterate over data in current phase loader
            with tqdm(phase_loader, unit="batch", total=phase_size) as epoch_iter:
                for batch, data in enumerate(epoch_iter):
                    # unpack data dict
                    id_tensor = data['id_tensor'].to(device)
                    mask_tensor = data['mask_tensor'].to(device)
                    token_type_tensor = data['token_type_tensor'].to(device)
                    label_tensor = data['label_tensor'].to(device)
                    
                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(
                            id_tensor,
                            mask_tensor,
                            token_type_tensor
                        )
                        preds = torch.squeeze(outputs)
                        loss = criterion(preds, label_tensor)

                        # update running loss
                        running_loss += loss.item() #* label_tensor.size(0)
                        # running_size += label_tensor.size(0)

                        # update metric collection
                        metric_collection.update(preds, label_tensor)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # update metrics after each 10% chunk
                    # or if in val update on last batch
                    if ((phase == 'train') & (batch % (max(phase_size // 10, 1)) == 0)) |\
                    ((phase == 'val') & (batch == (phase_size - 1))):
                        phase_metrics = metric_collection.compute()

                        phase_metrics_dict = format_metrics_dict(
                            loss, #/ running_size, 
                            phase_metrics, 
                            phase
                        )
                        epoch_iter.set_postfix(phase_metrics_dict)
                        
                    

            # reset metric collection
            metric_collection.reset()
            
            # save the model weights if the current val monitor metric is the best so far
            if (save_dir is not None) & is_metric_better(monitor_metric, phase_metrics_dict, best_metric):
                best_metric = phase_metrics_dict[monitor_metric]
                
                print(f"saving model with best {monitor_metric} '{best_metric:.4f}'...")
                torch.save(model.state_dict(), model_save_path)

    # load best model weights and evaluate on test set
    model.load_state_dict(torch.load(model_save_path))
    id_list, pred_list, label_list = evaluate_model(model, loader_dict['test'], metric_collection, criterion)
    return id_list, pred_list, label_list

def evaluate_model(model, test_loader, metric_collection, criterion):
    running_loss = 0.0
    
    tweet_id_list = []
    pred_list = []
    label_list = []

    phase_size = len(test_loader)

    # iterate over data in current phase loader
    with tqdm(test_loader, unit="batch", total=phase_size) as epoch_iter:
        for batch, data in enumerate(epoch_iter):
            # unpack data dict
            batch_id_list = data['tweet_id_list']
            id_tensor = data['id_tensor'].to(device)
            mask_tensor = data['mask_tensor'].to(device)
            token_type_tensor = data['token_type_tensor'].to(device)
            label_tensor = data['label_tensor'].to(device)

            # zero the parameter gradients
            # optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(False):
                outputs = model(
                    id_tensor,
                    mask_tensor,
                    token_type_tensor
                )
                preds = torch.squeeze(outputs)
                loss = criterion(preds, label_tensor)

                # update running loss
                running_loss += loss.item()

                # update metric collection
                metric_collection.update(preds, label_tensor)
                
                tweet_id_list += batch_id_list
                pred_list.append(preds.detach().cpu()) #.numpy())
                label_list.append(label_tensor.detach().cpu().numpy())

    phase_metrics = metric_collection.compute()

    phase_metrics_dict = format_metrics_dict(
        loss,
        phase_metrics, 
        'test'
    )

    # print metrics
    for k, v in phase_metrics_dict.items():
        print(f"{k}: {v:.4f}")
        
    return tweet_id_list, pred_list, label_list

def is_metric_better(monitor_metric, metrics_dict, best_eval):
    """
    function to determine if the monitor metric should be maximized or minimized
    """
    curr_eval = metrics_dict.get(monitor_metric)
    if curr_eval is None:
        return False
    
    if monitor_metric.split('_')[-1] == 'loss':
        return curr_eval < best_eval
    else:
        return curr_eval > best_eval
    
def format_metrics_dict(loss, metrics_dict, set_name: str):
    out_metrics_dict = {}
    out_metrics_dict[f'{set_name}_loss'] = loss.item()

    for k, v in metrics_dict.items():
        out_metrics_dict[f'{set_name}_{k}'] = v.item()

    return out_metrics_dict

In [17]:
# define metric collection
TASK_TYPE = 'binary'
NUM_CLASSES = 2

metric_collection = MetricCollection({
    'acc': Accuracy(task=TASK_TYPE, num_classes=NUM_CLASSES),
    'auc': AUROC(task=TASK_TYPE, num_classes=NUM_CLASSES),
    'prec': Precision(task=TASK_TYPE, num_classes=NUM_CLASSES),
    'rec': Recall(task=TASK_TYPE, num_classes=NUM_CLASSES),
    'f1': F1Score(task=TASK_TYPE, num_classes=NUM_CLASSES)
})

metric_collection.to(device)

MetricCollection(
  (acc): BinaryAccuracy()
  (auc): BinaryAUROC()
  (f1): BinaryF1Score()
  (prec): BinaryPrecision()
  (rec): BinaryRecall()
)

In [18]:
# define loss and optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.000005)

In [19]:
loader_dict = {'train': train_loader, 'val': val_loader, 'test': test_loader}

train_out_tuple = train_model(
    model, 
    loader_dict, 
    metric_collection, 
    criterion, 
    optimizer, 
    save_dir="./model_test_f1_4", 
    num_epochs=30, 
    monitor_metric='val_f1'
)


Epoch 0 ----------------------------------------


100%|██████████| 16/16 [00:45<00:00,  2.86s/batch, train_loss=0.638, train_acc=0.424, train_auc=0.508, train_f1=0.305, train_prec=0.18, train_rec=1]
100%|██████████| 6/6 [00:04<00:00,  1.32batch/s, val_loss=0.623, val_acc=0.839, val_auc=0.642, val_f1=0, val_prec=0, val_rec=0]


saving model with best val_f1 '0.0000'...

Epoch 1 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.425, train_acc=0.838, train_auc=0.529, train_f1=0, train_prec=0, train_rec=0]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.513, val_acc=0.839, val_auc=0.69, val_f1=0, val_prec=0, val_rec=0]



Epoch 2 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.397, train_acc=0.838, train_auc=0.659, train_f1=0, train_prec=0, train_rec=0]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.443, val_acc=0.839, val_auc=0.891, val_f1=0, val_prec=0, val_rec=0]



Epoch 3 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.327, train_acc=0.838, train_auc=0.872, train_f1=0, train_prec=0, train_rec=0]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.361, val_acc=0.839, val_auc=0.948, val_f1=0, val_prec=0, val_rec=0]



Epoch 4 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.467, train_acc=0.851, train_auc=0.925, train_f1=0, train_prec=0, train_rec=0]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.236, val_acc=0.924, val_auc=0.955, val_f1=0.737, val_prec=0.833, val_rec=0.66]


saving model with best val_f1 '0.7368'...

Epoch 5 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.142, train_acc=0.922, train_auc=0.923, train_f1=0.727, train_prec=0.889, train_rec=0.615]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.129, val_acc=0.918, val_auc=0.962, val_f1=0.761, val_prec=0.717, val_rec=0.811]


saving model with best val_f1 '0.7611'...

Epoch 6 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.174, train_acc=0.934, train_auc=0.958, train_f1=0.737, train_prec=0.667, train_rec=0.824]
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.0383, val_acc=0.93, val_auc=0.964, val_f1=0.783, val_prec=0.783, val_rec=0.783]


saving model with best val_f1 '0.7830'...

Epoch 7 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.113, train_acc=0.941, train_auc=0.969, train_f1=0.846, train_prec=0.815, train_rec=0.88]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.149, val_acc=0.936, val_auc=0.97, val_f1=0.802, val_prec=0.802, val_rec=0.802]


saving model with best val_f1 '0.8019'...

Epoch 8 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.16, train_acc=0.951, train_auc=0.966, train_f1=0.789, train_prec=0.789, train_rec=0.789] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.486, val_acc=0.939, val_auc=0.971, val_f1=0.817, val_prec=0.795, val_rec=0.84]


saving model with best val_f1 '0.8165'...

Epoch 9 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.0634, train_acc=0.959, train_auc=0.97, train_f1=0.765, train_prec=0.812, train_rec=0.722] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.359, val_acc=0.936, val_auc=0.971, val_f1=0.796, val_prec=0.82, val_rec=0.774]



Epoch 10 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0563, train_acc=0.962, train_auc=0.973, train_f1=0.85, train_prec=0.81, train_rec=0.895]
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.253, val_acc=0.938, val_auc=0.974, val_f1=0.806, val_prec=0.81, val_rec=0.802]



Epoch 11 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.143, train_acc=0.97, train_auc=0.976, train_f1=0.936, train_prec=0.917, train_rec=0.957]  
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.355, val_acc=0.933, val_auc=0.975, val_f1=0.8, val_prec=0.772, val_rec=0.83]



Epoch 12 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.104, train_acc=0.971, train_auc=0.98, train_f1=0.914, train_prec=1, train_rec=0.842]  
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.0429, val_acc=0.93, val_auc=0.973, val_f1=0.796, val_prec=0.75, val_rec=0.849]



Epoch 13 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.167, train_acc=0.969, train_auc=0.976, train_f1=0.895, train_prec=0.81, train_rec=1] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.0701, val_acc=0.942, val_auc=0.973, val_f1=0.814, val_prec=0.847, val_rec=0.783]



Epoch 14 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.11, train_acc=0.974, train_auc=0.985, train_f1=0.955, train_prec=1, train_rec=0.913]  
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.346, val_acc=0.929, val_auc=0.973, val_f1=0.789, val_prec=0.752, val_rec=0.83]



Epoch 15 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.0891, train_acc=0.979, train_auc=0.982, train_f1=0.95, train_prec=0.95, train_rec=0.95]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.409, val_acc=0.932, val_auc=0.972, val_f1=0.8, val_prec=0.756, val_rec=0.849]



Epoch 16 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0416, train_acc=0.981, train_auc=0.988, train_f1=0.9, train_prec=0.818, train_rec=1]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.2, val_acc=0.929, val_auc=0.971, val_f1=0.791, val_prec=0.748, val_rec=0.84]



Epoch 17 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.0427, train_acc=0.98, train_auc=0.986, train_f1=1, train_prec=1, train_rec=1] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.243, val_acc=0.93, val_auc=0.973, val_f1=0.795, val_prec=0.754, val_rec=0.84]



Epoch 18 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0438, train_acc=0.982, train_auc=0.987, train_f1=0.98, train_prec=1, train_rec=0.96]
100%|██████████| 6/6 [00:04<00:00,  1.31batch/s, val_loss=0.383, val_acc=0.936, val_auc=0.972, val_f1=0.802, val_prec=0.802, val_rec=0.802]



Epoch 19 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0701, train_acc=0.98, train_auc=0.992, train_f1=0.978, train_prec=1, train_rec=0.957] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.166, val_acc=0.939, val_auc=0.971, val_f1=0.808, val_prec=0.824, val_rec=0.792]



Epoch 20 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.103, train_acc=0.983, train_auc=0.99, train_f1=0.933, train_prec=0.933, train_rec=0.933]  
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.183, val_acc=0.938, val_auc=0.971, val_f1=0.802, val_prec=0.822, val_rec=0.783]



Epoch 21 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0606, train_acc=0.983, train_auc=0.989, train_f1=0.944, train_prec=1, train_rec=0.895]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.0129, val_acc=0.941, val_auc=0.973, val_f1=0.815, val_prec=0.819, val_rec=0.811]



Epoch 22 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.068, train_acc=0.982, train_auc=0.99, train_f1=1, train_prec=1, train_rec=1]  
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.438, val_acc=0.942, val_auc=0.972, val_f1=0.826, val_prec=0.804, val_rec=0.849]


saving model with best val_f1 '0.8257'...

Epoch 23 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0181, train_acc=0.985, train_auc=0.994, train_f1=1, train_prec=1, train_rec=1]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.217, val_acc=0.942, val_auc=0.971, val_f1=0.822, val_prec=0.815, val_rec=0.83]



Epoch 24 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.146, train_acc=0.985, train_auc=0.99, train_f1=0.914, train_prec=0.941, train_rec=0.889]  
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.156, val_acc=0.941, val_auc=0.971, val_f1=0.813, val_prec=0.825, val_rec=0.802]



Epoch 25 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0166, train_acc=0.986, train_auc=0.993, train_f1=1, train_prec=1, train_rec=1]
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.18, val_acc=0.938, val_auc=0.971, val_f1=0.814, val_prec=0.783, val_rec=0.849]



Epoch 26 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.119, train_acc=0.986, train_auc=0.992, train_f1=0.947, train_prec=0.947, train_rec=0.947] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.0742, val_acc=0.938, val_auc=0.971, val_f1=0.811, val_prec=0.793, val_rec=0.83]



Epoch 27 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.0824, train_acc=0.986, train_auc=0.994, train_f1=0.944, train_prec=0.944, train_rec=0.944]
100%|██████████| 6/6 [00:04<00:00,  1.34batch/s, val_loss=0.281, val_acc=0.936, val_auc=0.972, val_f1=0.811, val_prec=0.776, val_rec=0.849]



Epoch 28 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.30s/batch, train_loss=0.0292, train_acc=0.984, train_auc=0.994, train_f1=0.977, train_prec=1, train_rec=0.955]
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.437, val_acc=0.927, val_auc=0.971, val_f1=0.789, val_prec=0.738, val_rec=0.849]



Epoch 29 ----------------------------------------


100%|██████████| 16/16 [00:36<00:00,  2.31s/batch, train_loss=0.103, train_acc=0.984, train_auc=0.995, train_f1=0.919, train_prec=0.85, train_rec=1] 
100%|██████████| 6/6 [00:04<00:00,  1.33batch/s, val_loss=0.0432, val_acc=0.932, val_auc=0.972, val_f1=0.796, val_prec=0.765, val_rec=0.83]
100%|██████████| 6/6 [00:04<00:00,  1.21batch/s]

test_loss: 0.2447
test_acc: 0.9213
test_auc: 0.9550
test_f1: 0.7759
test_prec: 0.7258
test_rec: 0.8333





In [20]:
model.load_state_dict(torch.load("/opt/localdata/Data/bea/nlp/bmi550/project/model_test_f1_2/best_model_params.pth"))

id_list, pred_list, label_list = evaluate_model(model, test_loader, metric_collection, criterion)

100%|██████████| 6/6 [00:04<00:00,  1.23batch/s]

test_loss: 0.3630
test_acc: 0.9198
test_auc: 0.9368
test_f1: 0.7759
test_prec: 0.7258
test_rec: 0.8333



