### Installing modules

In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### Mounting *drive*

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Imports

### Importing libraries and modules

In [None]:
import os
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from tqdm import tqdm
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from transformers import RobertaModel, RobertaTokenizer
from sklearn.metrics import f1_score, accuracy_score, classification_report

import re
import json
import random
import shutil
import argparse
import functools
import contextlib
import numpy as np
import pandas as pd
from collections import Counter

import time
import logging
from datetime import timedelta

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

### Importing data

In [None]:
dataset = pd.read_excel("/content/drive/MyDrive/MM_AMZN_RW_DATA/Domain_wise_datasets/fashion_data_webent.xlsx")
dataset.drop(columns = ['Complaint Overall', 'Emotion Overall', 'Sentiment Overall','Emotion.0', 'Emotion.1', 'Emotion.2', 'Emotion.3', 'Comments'], inplace = True)
dataset = dataset.iloc[0:1096,:]
dataset.columns

Index(['Title', 'Review_S', 'Aspect_Term1', 'Comp_Analysis1', 'Aspect_Term2',
       'Comp_Analysis2', 'Aspect_Term3', 'Comp_Analysis3', 'Aspect_Term4',
       'Comp_Analysis4', 'Image_urls', 'Web_entities'],
      dtype='object')

### Definining hyperparameters

In [None]:
batch_sz = 8
drop_img_percent=0.0
dropout=0.1
embed_sz=300
freeze_img=0
freeze_txt=0
gradient_accumulation_steps=24
hidden=[]
hidden_sz=768
img_embed_pool_type="avg"
img_hidden_sz=2048
include_bn=True
lr=1e-4
lr_factor=0.5
lr_patience=2
max_epochs=50
max_seq_len=512
model_name="mmbt"
n_workers=2
name="nameless"
num_image_embeds=1
patience=10
savedir="/content/save_dir/"
seed=42
task="mmimdb"
task_type= "multilabel"#, "classification"]
warmup=0.1
weight_classes=1
n_classes = 7

tokenizer = RobertaTokenizer.from_pretrained("roberta-base", return_dict=True, do_lower_case=True)

## Data Preprocessing

### Getting data in lists

In [None]:
def get_title_review_comb(X_reviews):
    title_review_comb = []
    for i in range(X_reviews.shape[0]):
        if(str(type(X_reviews.iloc[i,0])) == "<class 'str'>"):
          title_review_comb.append(X_reviews.iloc[i,0]+X_reviews.iloc[i,1])
        else:
          # print(i)
          title_review_comb.append(X_reviews.iloc[i,1])

    text_reviews = []
    for i in range(X_reviews.shape[0]):
        words = re.split(r'\W+', title_review_comb[i])

        words = [word.lower() for word in words]
        text = ' '.join(words)
        words = text.split()
        text = ' '.join(words)
        text_reviews.append(text)

    return text_reviews

def check_aspect(aspect,j):
  aspects = []
  for i in [0,1,2,3]:
    val = aspect[i][j]
    if(str(val) == 'nan'):
      continue
    elif(val == 'fabric'):
      aspects.append('quality')
    else:
      aspects.append(val)
  return aspects


def get_labels(df):
    aspect = [list(df['Aspect_Term1']), list(df['Aspect_Term2']), list(df['Aspect_Term3']), list(df['Aspect_Term4'])]
    aspects = []

    for i in range(df.shape[0]):
        aspects.append(check_aspect(aspect,i))

    return aspects

def get_web_entities(dataset):
    web_entities = []
    for mul_entities in list(dataset['Web_entities']):
      lables_list = (re.sub("[^a-zA-Z0-9,.)]", " ", mul_entities)).split(',')
      # print(lables_list)
      lables_list_mod = []
      for label in lables_list:
        label_mod = " ".join(label.split())
        # print(label_mod)
        if(label_mod == ''):
          continue
        lables_list_mod.append(label_mod.lower())

      web_entities.append(lables_list_mod)

    return web_entities

imgs_path = '/content/drive/MyDrive/MM_AMZN_RW_DATA/Domain_wise_images/images_new/'
dir = os.listdir(imgs_path)
dir.sort()
directory = dir[965:1275]
directory+=(dir[:965])
for i in range(len(directory)):
  directory[i] = imgs_path+directory[i]

images_path = directory[0:1096]
text_review = get_title_review_comb(dataset)
web_entities = get_web_entities(dataset)
aspect_labels = get_labels(dataset)

print(len(aspect_labels))
print(len(text_review))
print(len(web_entities))
print(len(images_path))

1096
1096
1096
1096


### Splitting dataset

In [None]:
# First Split for Train and Test
text_train,text_test, web_ent_train,web_ent_test, img_train,img_test, y1_train,y1_test  = train_test_split(np.array(text_review), np.array(web_entities), np.array(images_path),
                                                                                                           np.array(aspect_labels), test_size=0.1, random_state=seed, shuffle=True)
# Next split Train in to training and validation
text_tr,text_val, web_ent_tr,web_ent_val, img_tr,img_val, y1_tr,y1_val = train_test_split(text_train, web_ent_train, img_train, y1_train, test_size=0.2, random_state = seed, shuffle=True)

# print(web_ent_tr.shape)
# print(web_ent_test.shape)
# print(web_ent_val.shape)

(788,)
(110,)
(198,)


  text_train,text_test, web_ent_train,web_ent_test, img_train,img_test, y1_train,y1_test  = train_test_split(np.array(text_review), np.array(web_entities), np.array(images_path),
  np.array(aspect_labels), test_size=0.1, random_state=seed, shuffle=True)


## Defining Functions and Classes

#### Classes and Utility functions


In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def save_checkpoint(state, is_best, checkpoint_path, filename="checkpoint.pt"):
    filename = os.path.join(checkpoint_path, filename)
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, os.path.join(checkpoint_path, "model_best.pt"))


def load_checkpoint(model, path):
    best_checkpoint = torch.load(path)
    model.load_state_dict(best_checkpoint["state_dict"])


def truncate_seq_pair(tokens_a, tokens_b, max_length):
    """Truncates a sequence pair in place to the maximum length.
    Copied from https://github.com/huggingface/pytorch-pretrained-BERT
    """
    while True:
        total_length = len(tokens_a) + len(tokens_b)
        if total_length <= max_length:
            break
        if len(tokens_a) > len(tokens_b):
            tokens_a.pop()
        else:
            tokens_b.pop()


def log_metrics(set_name, metrics, logger):
        logger.info(
            "{}: Loss_mean_overall: {:.5f} | Macro F1 {:.5f} | Micro F1: {:.5f}".format(
                set_name, metrics["loss_mean_overall"], metrics["macro_f1"], metrics["micro_f1"]
            )
        )


@contextlib.contextmanager
def numpy_seed(seed, *addl_seeds):
    """Context manager which seeds the NumPy PRNG with the specified seed and
    restores the state afterward"""
    if seed is None:
        yield
        return
    if len(addl_seeds) > 0:
        seed = int(hash((seed, *addl_seeds)) % 1e6)
    state = np.random.get_state()
    np.random.seed(seed)
    try:
        yield
    finally:
        np.random.set_state(state)

def get_labels_and_frequencies(aspects):
    label_freqs = Counter()
    data_labels = aspects
    if type(data_labels[0]) == list:
        for label_row in data_labels:
            label_freqs.update(label_row)
    else:
        label_freqs.update(data_labels)

    return list(label_freqs.keys()), label_freqs

aspect_label, aspect_label_freqs = get_labels_and_frequencies(aspect_labels)
print(aspect_label_freqs)

Counter({'quality': 704, 'fit': 390, 'service': 296, 'price': 282, 'style': 240, 'color': 231, 'misc': 65})


In [None]:
class JsonlDataset(Dataset):
    def __init__(self, reviews, web_entities, images_path, labels, tokenizer, transforms):
        self.text_data = reviews
        self.web_entities = web_entities
        self.img_data = images_path
        self.aspect_data = labels
        self.tokenizer = tokenizer

        self.text_start_token = ["<s>"] if model_name != "mmbt" else ["</s>"]

        self.max_seq_len = max_seq_len - num_image_embeds

        self.transforms = transforms

    def __len__(self):
        return len(self.text_data)

    def __getitem__(self, index):
        sent1 = self.tokenizer.tokenize(self.text_data[index])
        sent2 = self.tokenizer.tokenize(" ".join(self.web_entities[index]))
        truncate_seq_pair(sent1, sent2, self.max_seq_len - 3)

        sentence = self.text_start_token + sent1 + ["</s>"] + sent2 + ["</s>"]
        segment = torch.cat(
                [torch.zeros(2 + len(sent1)), torch.ones(len(sent2) + 1)]
            )

        sentence = torch.LongTensor(self.tokenizer.convert_tokens_to_ids(sentence))

        label = torch.zeros(7)
        label[
              [aspect_label.index(tgt) for tgt in self.aspect_data[index]]
            ] = 1

        image = None
        if self.img_data[index]:
            image = Image.open(self.img_data[index]).convert("RGB")
        else:
            image = Image.fromarray(128 * np.ones((256, 256, 3), dtype=np.uint8))
        image = self.transforms(image)

        # The first SEP is part of Image Token.
        segment = segment[1:]
        sentence = sentence[1:]
        # The first segment (0) is of images.
        segment += 1

        return sentence, segment, image, label



class LogFormatter:
    def __init__(self):
        self.start_time = time.time()

    def format(self, record):
        elapsed_seconds = round(record.created - self.start_time)

        prefix = "%s - %s - %s" % (
            record.levelname,
            time.strftime("%x %X"),
            timedelta(seconds=elapsed_seconds),
        )
        message = record.getMessage()
        message = message.replace("\n", "\n" + " " * (len(prefix) + 3))
        return "%s - %s" % (prefix, message)


def get_transforms():
    return transforms.Compose(
        [
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.46777044, 0.44531429, 0.40661017],
                std=[0.12221994, 0.12145835, 0.14380469],
            ),
        ]
    )


def collate_fn(batch):
    lens = [len(row[0]) for row in batch]
    bsz, max_seq_len = len(batch), max(lens)

    mask_tensor = torch.zeros(bsz, max_seq_len).long()
    text_tensor = torch.zeros(bsz, max_seq_len).long()
    segment_tensor = torch.zeros(bsz, max_seq_len).long()

    img_tensor = None
    if model_name in ["img", "concatbow", "concatbert", "mmbt"]:
        img_tensor = torch.stack([row[2] for row in batch])

    # Multilabel case
    tgt1_tensor = torch.stack([row[3] for row in batch])

    for i_batch, (input_row, length) in enumerate(zip(batch, lens)):
        tokens, segment = input_row[:2]
        text_tensor[i_batch, :length] = tokens
        segment_tensor[i_batch, :length] = segment
        mask_tensor[i_batch, :length] = 1

    return text_tensor, segment_tensor, mask_tensor, img_tensor, tgt1_tensor


def get_data_loaders():

    transforms = get_transforms()

    train = JsonlDataset(text_tr, web_ent_tr, img_tr, y1_tr, tokenizer, transforms)

    train_data_len = len(train)

    dev = JsonlDataset(text_val,web_ent_val,img_val, y1_val, tokenizer, transforms)

    collate = functools.partial(collate_fn)

    train_loader = DataLoader(
        train,
        batch_size=batch_sz,
        shuffle=True,
        num_workers=n_workers,
        collate_fn=collate,
    )

    val_loader = DataLoader(
        dev,
        batch_size=batch_sz,
        shuffle=False,
        num_workers=n_workers,
        collate_fn=collate,
    )

    test_set = JsonlDataset(text_test,web_ent_test,img_test,y1_test, tokenizer, transforms)

    test_loader = DataLoader(
        test_set,
        batch_size=batch_sz,
        shuffle=False,
        num_workers=n_workers,
        collate_fn=collate,
    )

    return train_loader, val_loader, test_loader


def create_logger(filepath):
    # create log formatter
    log_formatter = LogFormatter()

    # create file handler and set level to debug
    file_handler = logging.FileHandler(filepath, "a")
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(log_formatter)

    # create console handler and set level to info
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(log_formatter)

    # create logger and set level to debug
    logger = logging.getLogger()
    logger.handlers = []
    logger.setLevel(logging.DEBUG)
    logger.propagate = False
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)

    # reset logger elapsed time
    def reset_time():
        log_formatter.start_time = time.time()

    logger.reset_time = reset_time

    logger.info(
        "\n".join(
            "%s: %s" % (k, str(v))
            for k, v in sorted(dict(vars()).items(), key=lambda x: x[0])
        )
    )
    return logger


def get_criterion():
      if weight_classes:
            freqs = [aspect_label_freqs[l] for l in aspect_label]
            label_weights = (torch.FloatTensor(freqs) / len(text_tr)) ** -1
            criterion = nn.BCEWithLogitsLoss(pos_weight=label_weights.cuda())
      else:
          criterion = nn.BCEWithLogitsLoss()
      return criterion



def get_scheduler(optimizer):
    return optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, "max", patience=lr_patience, verbose=True, factor=lr_factor
    )

#### Modeling

In [None]:
class RobertaEncoder(nn.Module):
    def __init__(self):
        super(RobertaEncoder, self).__init__()
        # self.args = args
        self.bert = RobertaModel.from_pretrained("roberta-base")

    def forward(self, txt, mask, segment):
        _, out = self.bert(
            txt,
            token_type_ids=segment,
            attention_mask=mask,
            output_all_encoded_layers=False,
        )
        return out

class ImageEncoder(nn.Module):
    def __init__(self):
        super(ImageEncoder, self).__init__()
        # self.args = args
        model = torchvision.models.resnet152(pretrained=True)
        modules = list(model.children())[:-2]
        self.model = nn.Sequential(*modules)

        pool_func = (
            nn.AdaptiveAvgPool2d
            if img_embed_pool_type == "avg"
            else nn.AdaptiveMaxPool2d
        )

        if num_image_embeds in [1, 2, 3, 5, 7]:
            self.pool = pool_func((num_image_embeds, 1))
        elif num_image_embeds == 4:
            self.pool = pool_func((2, 2))
        elif num_image_embeds == 6:
            self.pool = pool_func((3, 2))
        elif num_image_embeds == 8:
            self.pool = pool_func((4, 2))
        elif num_image_embeds == 9:
            self.pool = pool_func((3, 3))

    def forward(self, x):
        # Bx3x224x224 -> Bx2048x7x7 -> Bx2048xN -> BxNx2048
        out = self.pool(self.model(x))
        out = torch.flatten(out, start_dim=2)
        out = out.transpose(1, 2).contiguous()
        return out  # BxNx2048

class ImageBertEmbeddings(nn.Module):
    def __init__(self, embeddings):
        super(ImageBertEmbeddings, self).__init__()
        # self. =
        self.img_embeddings = nn.Linear(img_hidden_sz, hidden_sz)
        self.position_embeddings = embeddings.position_embeddings
        self.token_type_embeddings = embeddings.token_type_embeddings
        self.word_embeddings = embeddings.word_embeddings
        self.LayerNorm = embeddings.LayerNorm
        self.dropout = nn.Dropout(p=dropout)
        self.num_image_embeds = num_image_embeds
        # self.vocab = vocab
        self.tokenizer = tokenizer

    def forward(self, input_imgs, token_type_ids):
        bsz = input_imgs.size(0)
        seq_length = self.num_image_embeds + 2  # +2 for CLS and SEP Token

        cls_id = torch.LongTensor([self.tokenizer.convert_tokens_to_ids("[CLS]")]).cuda()
        cls_id = cls_id.unsqueeze(0).expand(bsz, 1)
        cls_token_embeds = self.word_embeddings(cls_id)

        sep_id = torch.LongTensor([self.tokenizer.convert_tokens_to_ids("[SEP]")]).cuda()
        sep_id = sep_id.unsqueeze(0).expand(bsz, 1)
        sep_token_embeds = self.word_embeddings(sep_id)

        imgs_embeddings = self.img_embeddings(input_imgs)
        token_embeddings = torch.cat(
            [cls_token_embeds, imgs_embeddings, sep_token_embeds], dim=1
        )

        position_ids = torch.arange(seq_length, dtype=torch.long).cuda()
        position_ids = position_ids.unsqueeze(0).expand(bsz, seq_length)
        position_embeddings = self.position_embeddings(position_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)
        embeddings = token_embeddings + position_embeddings + token_type_embeddings
        embeddings = self.LayerNorm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings


class MultimodalRobertaEncoder(nn.Module):
    def __init__(self, ):
        super(MultimodalRobertaEncoder, self).__init__()
        bert = RobertaModel.from_pretrained("roberta-base")
        self.txt_embeddings = bert.embeddings

        ternary_embeds = nn.Embedding(3, hidden_sz)
        ternary_embeds.weight.data[:2].copy_(
        bert.embeddings.token_type_embeddings.weight
            )
        ternary_embeds.weight.data[2].copy_(
                bert.embeddings.token_type_embeddings.weight.data.mean(dim=0)
            )
        self.txt_embeddings.token_type_embeddings = ternary_embeds

        self.img_embeddings = ImageBertEmbeddings(self.txt_embeddings)
        self.img_encoder = ImageEncoder()
        self.encoder = bert.encoder
        self.pooler = bert.pooler
        self.num_image_embeds = num_image_embeds

    def forward(self, input_txt, attention_mask, segment, input_img):
        bsz = input_txt.size(0)
        attention_mask = torch.cat(
            [
                torch.ones(bsz, self.num_image_embeds + 2).long().cuda(),
                attention_mask,
            ],
            dim=1,
        )
        extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
        extended_attention_mask = extended_attention_mask.to(
            dtype=next(self.parameters()).dtype
        )
        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        img_tok = (
            torch.LongTensor(input_txt.size(0), self.num_image_embeds + 2)
            .fill_(0)
            .cuda()
        )
        img = self.img_encoder(input_img)  # BxNx3x224x224 -> BxNx2048
        img_embed_out = self.img_embeddings(img, img_tok)
        txt_embed_out = self.txt_embeddings(input_txt, segment)
        encoder_input = torch.cat([img_embed_out, txt_embed_out], 1)  # Bx(TEXT+IMG)xHID

        encoded_layers = self.encoder(
            encoder_input, extended_attention_mask)#, output_all_encoded_layers=False)

        return self.pooler(encoded_layers[-1])


class MultimodalBertClf(nn.Module):
    def __init__(self, ):
        super(MultimodalBertClf, self).__init__()

        self.enc = MultimodalRobertaEncoder()
        self.clf1 = nn.Linear(hidden_sz, n_classes)

    def forward(self, txt, mask, segment, img):
        x = self.enc(txt, mask, segment, img)
        out_head1 = self.clf1(x)
        return (out_head1)

#### Forward method

In [None]:
def model_forward(i_epoch, model, criterion, batch):
    txt, segment, mask, img, tgt1 = batch

    for param in model.enc.img_encoder.parameters():
        param.requires_grad = not freeze_img
    for param in model.enc.encoder.parameters():
        param.requires_grad = not freeze_txt

    txt, img = txt.cuda(), img.cuda()
    mask, segment = mask.cuda(), segment.cuda()

    out1 = model(txt, mask, segment, img)

    tgt1 = tgt1.cuda()
    loss1 = criterion(out1, tgt1)

    return loss1, out1, tgt1

#### Evaluate and predict

In [None]:
def model_eval(i_epoch, data, model, criterion, store_preds=False):
    with torch.no_grad():
        losses, preds1, tgts1 = [], [], []
        for batch in data:
            loss, out1, tgt1 = model_forward(i_epoch, model, criterion, batch)
            losses.append(loss.item())

            pred1 = torch.sigmoid(out1).cpu().detach().numpy() > 0.5

            preds1.append(pred1)

            tgt1 = tgt1.cpu().detach().numpy()
            tgts1.append(tgt1)

    metrics = {"loss_mean_overall": np.mean(losses)}
    tgts1 = np.vstack(tgts1)
    preds1 = np.vstack(preds1)
    metrics["macro_f1"] = f1_score(tgts1, preds1, average="macro")
    metrics["micro_f1"] = f1_score(tgts1, preds1, average="micro")

    return metrics

def model_pred(i_epoch, data, model, criterion, store_preds=False):
    with torch.no_grad():
        losses, preds1, tgts1 = [], [], []
        for batch in data:
            loss, out1, tgt1 = model_forward(i_epoch, model, criterion, batch)
            losses.append(loss.item())

            pred1 = torch.sigmoid(out1).cpu().detach().numpy() > 0.5
            preds1.append(pred1)

            tgt1 = tgt1.cpu().detach().numpy()
            tgts1.append(tgt1)

        return (tgts1, preds1)

## Initiating model

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

set_seed(seed)
savedir = os.path.join(savedir, name)
os.makedirs(savedir, exist_ok=True)

train_loader, val_loader, test_loaders = get_data_loaders()

model = MultimodalBertClf()
criterion = get_criterion()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = get_scheduler(optimizer)

logger = create_logger("%s/logfile.log" % savedir)
logger.info(model)
model.cuda()

start_epoch, global_step, n_no_improve, best_metric = 0, 0, 0, -np.inf

if os.path.exists(os.path.join(savedir, "checkpoint.pt")):
        checkpoint = torch.load(os.path.join(savedir, "checkpoint.pt"))
        start_epoch = checkpoint["epoch"]
        n_no_improve = checkpoint["n_no_improve"]
        best_metric = checkpoint["best_metric"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        scheduler.load_state_dict(checkpoint["scheduler"])

## Training

In [None]:
torch.cuda.empty_cache()
logger.info("Training..")
for i_epoch in range(start_epoch, max_epochs):
        train_losses = []
        model.train()
        optimizer.zero_grad()

        for batch in tqdm(train_loader, total=len(train_loader)):
            loss, _, _ = model_forward(i_epoch, model,criterion, batch)
            if gradient_accumulation_steps > 1:
                loss = loss / gradient_accumulation_steps

            train_losses.append(loss.item())
            loss.backward()
            global_step += 1
            if global_step % gradient_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

        model.eval()
        metrics = model_eval(i_epoch, val_loader, model, criterion)
        logger.info("Train Loss: {:.4f}".format(np.mean(train_losses)))
        log_metrics("Val", metrics,logger)

        tuning_metric = metrics["micro_f1"]

        scheduler.step(tuning_metric)
        is_improvement = tuning_metric > best_metric
        if is_improvement:
            best_metric = tuning_metric
            n_no_improve = 0
        else:
            n_no_improve += 1

        save_checkpoint(
            {
                "epoch": i_epoch + 1,
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "scheduler": scheduler.state_dict(),
                "n_no_improve": n_no_improve,
                "best_metric": best_metric,
            },
            is_improvement,
            savedir,
        )

        if n_no_improve >= patience:
            logger.info("No improvement. Breaking out of loop.")
            break

INFO - 01/09/23 09:01:24 - 0:00:04 - Training..
100%|██████████| 99/99 [00:51<00:00,  1.94it/s]
INFO - 01/09/23 09:02:20 - 0:01:00 - Train Loss: 0.0411
INFO - 01/09/23 09:02:20 - 0:01:00 - Val: Loss_mean_overall: 0.94827 | Macro F1 0.27295 | Micro F1: 0.52525
100%|██████████| 99/99 [00:52<00:00,  1.89it/s]
INFO - 01/09/23 09:03:37 - 0:02:17 - Train Loss: 0.0399
INFO - 01/09/23 09:03:37 - 0:02:17 - Val: Loss_mean_overall: 0.93341 | Macro F1 0.36093 | Micro F1: 0.55517
100%|██████████| 99/99 [00:51<00:00,  1.93it/s]
INFO - 01/09/23 09:04:50 - 0:03:30 - Train Loss: 0.0382
INFO - 01/09/23 09:04:50 - 0:03:30 - Val: Loss_mean_overall: 0.83606 | Macro F1 0.50697 | Micro F1: 0.57072
100%|██████████| 99/99 [00:51<00:00,  1.94it/s]
INFO - 01/09/23 09:06:03 - 0:04:43 - Train Loss: 0.0350
INFO - 01/09/23 09:06:03 - 0:04:43 - Val: Loss_mean_overall: 0.78855 | Macro F1 0.53420 | Micro F1: 0.60556
100%|██████████| 99/99 [00:52<00:00,  1.89it/s]
INFO - 01/09/23 09:07:18 - 0:05:58 - Train Loss: 0.0308


Epoch 00017: reducing learning rate of group 0 to 5.0000e-05.


100%|██████████| 99/99 [00:51<00:00,  1.93it/s]
INFO - 01/09/23 09:22:41 - 0:21:21 - Train Loss: 0.0048
INFO - 01/09/23 09:22:41 - 0:21:21 - Val: Loss_mean_overall: 0.42673 | Macro F1 0.80813 | Micro F1: 0.85052
100%|██████████| 99/99 [00:51<00:00,  1.91it/s]
INFO - 01/09/23 09:23:56 - 0:22:36 - Train Loss: 0.0042
INFO - 01/09/23 09:23:56 - 0:22:36 - Val: Loss_mean_overall: 0.43363 | Macro F1 0.80786 | Micro F1: 0.84571
100%|██████████| 99/99 [00:51<00:00,  1.93it/s]
INFO - 01/09/23 09:25:01 - 0:23:41 - Train Loss: 0.0039
INFO - 01/09/23 09:25:01 - 0:23:41 - Val: Loss_mean_overall: 0.42378 | Macro F1 0.81236 | Micro F1: 0.85287
100%|██████████| 99/99 [00:51<00:00,  1.92it/s]
INFO - 01/09/23 09:26:15 - 0:24:55 - Train Loss: 0.0035
INFO - 01/09/23 09:26:15 - 0:24:55 - Val: Loss_mean_overall: 0.43979 | Macro F1 0.81081 | Micro F1: 0.85219
100%|██████████| 99/99 [00:51<00:00,  1.93it/s]
INFO - 01/09/23 09:27:19 - 0:25:59 - Train Loss: 0.0034
INFO - 01/09/23 09:27:19 - 0:25:59 - Val: Loss_m

Epoch 00025: reducing learning rate of group 0 to 2.5000e-05.


100%|██████████| 99/99 [00:50<00:00,  1.95it/s]
INFO - 01/09/23 09:31:48 - 0:30:28 - Train Loss: 0.0025
INFO - 01/09/23 09:31:48 - 0:30:28 - Val: Loss_mean_overall: 0.45596 | Macro F1 0.81481 | Micro F1: 0.85450
100%|██████████| 99/99 [00:51<00:00,  1.92it/s]
INFO - 01/09/23 09:33:02 - 0:31:42 - Train Loss: 0.0023
INFO - 01/09/23 09:33:02 - 0:31:42 - Val: Loss_mean_overall: 0.45388 | Macro F1 0.81353 | Micro F1: 0.85287
100%|██████████| 99/99 [00:50<00:00,  1.94it/s]
INFO - 01/09/23 09:34:06 - 0:32:46 - Train Loss: 0.0023
INFO - 01/09/23 09:34:06 - 0:32:46 - Val: Loss_mean_overall: 0.45890 | Macro F1 0.81263 | Micro F1: 0.84890
100%|██████████| 99/99 [00:50<00:00,  1.94it/s]
INFO - 01/09/23 09:35:10 - 0:33:50 - Train Loss: 0.0022
INFO - 01/09/23 09:35:10 - 0:33:50 - Val: Loss_mean_overall: 0.47046 | Macro F1 0.78372 | Micro F1: 0.84758


Epoch 00029: reducing learning rate of group 0 to 1.2500e-05.


100%|██████████| 99/99 [00:51<00:00,  1.92it/s]
INFO - 01/09/23 09:36:15 - 0:34:55 - Train Loss: 0.0021
INFO - 01/09/23 09:36:15 - 0:34:55 - Val: Loss_mean_overall: 0.47016 | Macro F1 0.78616 | Micro F1: 0.84988
100%|██████████| 99/99 [00:50<00:00,  1.95it/s]
INFO - 01/09/23 09:37:20 - 0:36:00 - Train Loss: 0.0021
INFO - 01/09/23 09:37:20 - 0:36:00 - Val: Loss_mean_overall: 0.46619 | Macro F1 0.78382 | Micro F1: 0.84624
100%|██████████| 99/99 [00:51<00:00,  1.93it/s]
INFO - 01/09/23 09:38:24 - 0:37:04 - Train Loss: 0.0021
INFO - 01/09/23 09:38:24 - 0:37:04 - Val: Loss_mean_overall: 0.46887 | Macro F1 0.78755 | Micro F1: 0.85052


Epoch 00032: reducing learning rate of group 0 to 6.2500e-06.


100%|██████████| 99/99 [00:51<00:00,  1.92it/s]
INFO - 01/09/23 09:39:29 - 0:38:09 - Train Loss: 0.0021
INFO - 01/09/23 09:39:29 - 0:38:09 - Val: Loss_mean_overall: 0.47201 | Macro F1 0.78710 | Micro F1: 0.84954
100%|██████████| 99/99 [00:51<00:00,  1.94it/s]
INFO - 01/09/23 09:40:33 - 0:39:13 - Train Loss: 0.0021
INFO - 01/09/23 09:40:33 - 0:39:13 - Val: Loss_mean_overall: 0.47514 | Macro F1 0.78608 | Micro F1: 0.84855
100%|██████████| 99/99 [00:50<00:00,  1.95it/s]
INFO - 01/09/23 09:41:38 - 0:40:18 - Train Loss: 0.0020
INFO - 01/09/23 09:41:38 - 0:40:18 - Val: Loss_mean_overall: 0.47650 | Macro F1 0.78801 | Micro F1: 0.84988


Epoch 00035: reducing learning rate of group 0 to 3.1250e-06.


100%|██████████| 99/99 [00:51<00:00,  1.92it/s]
INFO - 01/09/23 09:42:43 - 0:41:23 - Train Loss: 0.0021
INFO - 01/09/23 09:42:43 - 0:41:23 - Val: Loss_mean_overall: 0.47634 | Macro F1 0.79159 | Micro F1: 0.84988
INFO - 01/09/23 09:42:51 - 0:41:31 - No improvement. Breaking out of loop.


## Testing

In [None]:
load_checkpoint(model, "/content/save_dir/nameless/model_best.pt")
model.eval()
test_metrics = model_eval(
            np.inf, test_loaders, model, criterion, store_preds=False
        )

print('test_metrics')
print()
test_metrics

test_metrics



{'loss_mean_overall': 0.5442859019551959,
 'macro_f1': 0.8334651599239125,
 'micro_f1': 0.8565400843881857}