# Multimodal

## Setup

### Imports

In [1]:
# Imports
import os
import numpy as np
import pandas as pd
import random
from tqdm.notebook import tqdm
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, pipeline
from torch.utils.data import Dataset, DataLoader
from src.utils import *
from src.models import *
from src.process_reports import *
from src.train import train_mm, kfold_cv

%load_ext autoreload
%autoreload 2

### Set seed & device

In [2]:
set_seed(42)      
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# set torch matmul precision
torch.set_float32_matmul_precision('medium')

### Extract text feats from reports

In [3]:
%%script false --no-raise-error
# load biobert model & tokenizer
tokenizer = AutoTokenizer.from_pretrained(''dmis-lab/biobert-large-cased-v1.1-mnli)
lm = AutoModel.from_pretrained('dmis-lab/biobert-large-cased-v1.1-mnli')
data_dir = '/mnt/disks/ext/data/gdc/tcga/brca'
output_dir = 'data/report_feats'
extract_text_features(lm, tokenizer, data_dir)

## Task: Predict sTILs from WSIs & reports

### Load data

In [2]:
# create the dataloaders
root_dir = './'
data_file_stils = 'data/stils/data_stils.csv'
bsz = 32 # batch size for dataloaders
train_loader, val_loader, test_loader = create_dataloaders('stils', data_file_stils, use_rand_splits=True, bsz=bsz)

size of train set: 557, val set: 69, test set: 70


### Train & eval

In [4]:
# %%script false --no-raise-error
# init model
mode = 'img'  # input modalities: 'text', 'img', or 'multimodal'
split = 'def'   # dataset split: 'def' or 'rand'
model = Attention1DSTILRegressor(mode=mode)

# set training args
args = {'num_epochs': 100, 'ckpt_name': f'ckpt_best_{mode}_{split}_split', 'resume_ckpt': None, 'tblog_name': f'best_{mode}_{split}_split'}

# train model
model, trainer = train_mm(model, train_loader, val_loader, args)

# evaluate the trained model on the test set
trainer.test(model, test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type            | Params
-----------------------------------------------
0 | attention  | Sequential      | 262 K 
1 | regressor  | Sequential      | 2.0 K 
2 | loss       | MSELoss         | 0     
3 | train_corr | PearsonCorrCoef | 0     
4 | train_r2   | R2Score         | 0     
5 | val_corr   | PearsonCorrCoef | 0     
6 | val_r2     | R2Score         | 0     
7 | test_corr  | PearsonCorrCoef | 0     
8 | test_r2    | R2Score         | 0     
-----------------------------------------------
26

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_loss improved. New best score: 0.135
  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


training on device: cuda:0


Testing: 0it [00:00, ?it/s]

[{'test_corr_epoch': 0.049729831516742706,
  'test_r2_epoch': -1.7736320495605469}]

### K-fold CV

In [5]:
# %%script false --no-raise-error
# run k-fold CV
dataset = MMSTILDataset()
bsz = 64
# init model
model_class = Attention1DSTILRegressor
mode = 'img'
target = 'stils'
# model args
model_args = {'mode': mode, 'target': target}
# train args
train_args = {'bsz': bsz, 'k': 5, 'num_epochs': 100, 'patience': 5, 'save_top_k': 0, 'tblog_name': f'best_{mode}_kfold', 'enable_progress_bar': False}

res_kfold_cv = kfold_cv(model_class, dataset, model_args, train_args)
metrics = ['test_corr_epoch', 'test_r2_epoch']
avg_res = {k: np.mean([res[k] for res in res_kfold_cv]).round(3) for k in metrics}
print(f"avg res over {train_args['k']} folds: {avg_res}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type            | Params
-----------------------------------------------
0 | attention  | Sequential      | 262 K 
1 | regressor  | Sequential      | 2.0 K 
2 | loss       | MSELoss         | 0     
3 | train_corr | PearsonCorrCoef | 0     
4 | train_r2   | R2Score         | 0     
5 | val_corr   | PearsonCorrCoef | 0     
6 | val_r2     | R2Score         | 0     
7 | test_corr  | PearsonCorrCoef | 0     
8 | test_r2    | R2Score         | 0     
-----------------------------------------------
264 K     Trainable params
0         Non-trainable params
264 K     Total params
1.058     Total estimated model params size (MB)


training fold 1/5


Metric val_loss improved. New best score: 0.114
Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.096
Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 0.075
Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.056
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.045
Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.042
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.042
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.042
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.042
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.042
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.042
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.041
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.041
Metric val_loss improved by 0.000 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type            | Params
-----------------------------------------------
0 | attention  | Sequential      | 262 K 
1 | regressor  | Sequential      | 2.0 K 
2 | loss       | MSELoss         | 0     
3 | train_corr | PearsonCorrCoef | 0     
4 | train_r2   | R2Score         | 0     
5 | val_corr   | PearsonCorrCoef | 0     
6 | val_r2     | R2Score         | 0     
7 | test_corr  | PearsonCorrCoef | 0     
8 | test_r2    | R2Score         | 0     
-----------------------------------------------
264 K     Trainable params
0         Non-trainable params
264 K     Total params
1.058     Total estimated model params size (MB)


training fold 2/5


Metric val_loss improved. New best score: 0.126
Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.100
Metric val_loss improved by 0.029 >= min_delta = 0.0. New best score: 0.071
Metric val_loss improved by 0.020 >= min_delta = 0.0. New best score: 0.050
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.040
Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.037
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.035
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.035
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.035
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.034
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.034
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.034
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.034
Metric val_loss improved by 0.000 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type            | Params
-----------------------------------------------
0 | attention  | Sequential      | 262 K 
1 | regressor  | Sequential      | 2.0 K 
2 | loss       | MSELoss         | 0     
3 | train_corr | PearsonCorrCoef | 0     
4 | train_r2   | R2Score         | 0     
5 | val_corr   | PearsonCorrCoef | 0     
6 | val_r2     | R2Score         | 0     
7 | test_corr  | PearsonCorrCoef | 0     
8 | test_r2    | R2Score         | 0     
-----------------------------------------------
264 K     Trainable params
0         Non-trainable params
264 K     Total params
1.058     Total estimated model params size (MB)


training fold 3/5


Metric val_loss improved. New best score: 0.124
Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.100
Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.073
Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.050
Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.038
Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.033
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.031
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.031
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.030
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.030
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.030
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.030
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.029
Metric val_loss improved by 0.000 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type            | Params
-----------------------------------------------
0 | attention  | Sequential      | 262 K 
1 | regressor  | Sequential      | 2.0 K 
2 | loss       | MSELoss         | 0     
3 | train_corr | PearsonCorrCoef | 0     
4 | train_r2   | R2Score         | 0     
5 | val_corr   | PearsonCorrCoef | 0     
6 | val_r2     | R2Score         | 0     
7 | test_corr  | PearsonCorrCoef | 0     
8 | test_r2    | R2Score         | 0     
-----------------------------------------------
264 K     Trainable params
0         Non-trainable params
264 K     Total params
1.058     Total estimated model params size (MB)


training fold 4/5


Metric val_loss improved. New best score: 0.133
Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.106
Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 0.075
Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.052
Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.041
Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.037
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.035
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.034
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.033
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.033
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.033
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.032
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.032
Metric val_loss improved by 0.000 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type            | Params
-----------------------------------------------
0 | attention  | Sequential      | 262 K 
1 | regressor  | Sequential      | 2.0 K 
2 | loss       | MSELoss         | 0     
3 | train_corr | PearsonCorrCoef | 0     
4 | train_r2   | R2Score         | 0     
5 | val_corr   | PearsonCorrCoef | 0     
6 | val_r2     | R2Score         | 0     
7 | test_corr  | PearsonCorrCoef | 0     
8 | test_r2    | R2Score         | 0     
-----------------------------------------------
264 K     Trainable params
0         Non-trainable params
264 K     Total params
1.058     Total estimated model params size (MB)


training fold 5/5


Metric val_loss improved. New best score: 0.118
Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.094
Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.067
Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 0.046
Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.035
Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.031
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.029
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.029
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.029
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.028
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.028
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.028
Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.028
Metric val_loss improved by 0.000 >= min

training on device: cpu


avg res over 5 folds: {'test_corr_epoch': 0.457, 'test_r2_epoch': 0.184}


## Task: Predict subtype & grade from multimodal data

### Annotate subtype & grade from path reports

In [6]:
%%script false --no-raise-error
lm_name = 'gpt-3.5-turbo'

# sample_report_path = 'data/reports_distilled/TCGA-WT-AB41.txt'
reports_dir = 'data/reports_distilled'
# create prompt
# prompt = create_zs_prompt(sample_report_path)

# args for generation
gen_args = {'max_tokens': 200}

# out = gen_subtype_grade_zs(lm_name, prompt, api='openai', args=gen_args)
df_res = classify_reports_zs(lm_name, reports_dir, api='openai', args=gen_args)
df_res.head(10)

  0%|          | 0/1068 [00:00<?, ?it/s]

 21%|██        | 219/1068 [03:57<15:56,  1.13s/it]

error parsing output for TCGA-C8-A27A.txt...


 88%|████████▊ | 939/1068 [16:19<02:11,  1.02s/it]

HTTP error: 502 Server Error: Bad Gateway for url: https://api.openai.com/v1/chat/completions, retrying...


 99%|█████████▉| 1055/1068 [28:29<00:13,  1.06s/it]  

HTTP error: 502 Server Error: Bad Gateway for url: https://api.openai.com/v1/chat/completions, retrying...


100%|██████████| 1068/1068 [38:51<00:00,  2.18s/it] 


Unnamed: 0,case_id,region,localization,grade
13,TCGA-3C-AAAU,Lobular,Invasive,3
952,TCGA-3C-AALI,Ductal,Invasive,2
458,TCGA-3C-AALJ,Ductal,Invasive,3
701,TCGA-3C-AALK,ductal,invasive,
568,TCGA-4H-AAAK,Lobular,Invasive,2
420,TCGA-5L-AAT0,lobular,invasive,1
878,TCGA-5L-AAT1,lobular,invasive,1
341,TCGA-5T-A9QA,"Ductal (as the report mentions ""invasive ducta...","Invasive (as the report mentions ""invasive car...","3 (as the report mentions ""Histologic Grade"
1051,TCGA-A1-A0SB,other (adenoid cystic carcinoma),invasive,1
371,TCGA-A1-A0SD,Ductal,Invasive,Grade 2


### Train & eval model

### Set hparams

In [2]:
bsz = 128 # batch size for dataloaders
img_channels_in = 2048  # emb dim of wsi feats
text_channels_in = 1024 # emb dim of report feats

### Load data

In [3]:
# Create the dataset
root_dir = './'
data_file_stils = 'data/data_subtype_grade.csv'
use_rand_splits = False # use random splits or predefined splits
train_data = MMDataset(root_dir, data_file_stils, 'train', use_rand_splits)
val_data = MMDataset(root_dir, data_file_stils, 'val', use_rand_splits)
test_data = MMDataset(root_dir, data_file_stils, 'test', use_rand_splits)

print(f'size of train set: {len(train_data)}, val set: {len(val_data)}, test set: {len(test_data)}')

# Create the dataloaders
train_loader = DataLoader(train_data, batch_size=bsz, shuffle=True, num_workers=12, collate_fn=MMDataset.mm_collate_fn)
val_loader = DataLoader(val_data, batch_size=bsz, shuffle=False, num_workers=12, collate_fn=MMDataset.mm_collate_fn)
test_loader = DataLoader(test_data, batch_size=bsz, shuffle=False, num_workers=12, collate_fn=MMDataset.mm_collate_fn)

size of train set: 855, val set: 107, test set: 107


### Train & eval

In [None]:
# init model
mode = 'multimodal'  # input modalities: 'text', 'img', or 'multimodal'
target = 'grade'  # prediction target: 'region', 'localization', or 'grade'
split = 'def'   # dataset split: 'def' or 'rand'
model = Attention1DClassifier(mode=mode, target=target)

# set training args
args = {'num_epochs': 100, 'ckpt_name': f'ckpt_best_{mode}_{split}_split', 'resume_ckpt': None, 'tblog_name': f'best_{mode}_{split}_split'}

# train model
model, trainer = train_mm(model, train_loader, val_loader, args)

# evaluate the trained model on the test set
trainer.test(model, test_loader)

### K-fold CV

In [3]:
# %%script false --no-raise-error
dataset = MMDataset(data_file = 'data/data_subtype_grade.csv')

# init model
model_class = Attention1DClassifier
mode = 'multimodal'
target = 'grade'
model_args = {'mode': mode, 'target': target}
# train args
train_args = {'bsz': 128, 'k': 5, 'num_epochs': 50, 'save_top_k': 0, 'tblog_name': f'best_{mode}_kfoldcv', 'enable_progress_bar': False}

# run k-fold CV
res_kfold_cv = kfold_cv(model_class, dataset, model_args, train_args)

# print avg results over k folds
metrics = ['test_acc_epoch']
avg_res = {k: np.mean([res[k] for res in res_kfold_cv]).round(3) for k in metrics}
print(f"avg CV res for {target} classification task using {mode} inputs over {train_args['k']} folds: {avg_res}")

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


training fold 1/5


You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params
--------------------------------------------------
0 | attention  | Sequential         | 278 K 
1 | classifier | Sequential         | 12.3 K
2 | loss       | CrossEntropyLoss   | 0     
3 | acc        | MulticlassAccuracy | 0     
--------------------------------------------------
291 K     Trainable params
0         Non-trainable params
291 K     Total params
1.165     Total estimated model params size (MB)
Metric val_loss improved. New best score: 1.198
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.196
Metric val_los

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params
--------------------------------------------------
0 | attention  | Sequential         | 278 K 
1 | classifier | Sequential         | 12.3 K
2 | loss       | CrossEntropyLoss   | 0     
3 | acc        | MulticlassAccuracy | 0     
--------------------------------------------------
291 K     Trainable params
0         Non-trainable params
291 K     Total params
1.165     Total estimated model params size (MB)


training fold 2/5


Metric val_loss improved. New best score: 1.261
Metric val_loss improved by 0.053 >= min_delta = 0.0. New best score: 1.208
Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.204
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.194
Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 1.190
Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 1.169
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.167
Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 1.151
Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 1.148
Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 1.132
Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 1.129
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.127
Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 1.115
Metric val_loss improved by 0.005 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params
--------------------------------------------------
0 | attention  | Sequential         | 278 K 
1 | classifier | Sequential         | 12.3 K
2 | loss       | CrossEntropyLoss   | 0     
3 | acc        | MulticlassAccuracy | 0     
--------------------------------------------------
291 K     Trainable params
0         Non-trainable params
291 K     Total params
1.165     Total estimated model params size (MB)


training fold 3/5


Metric val_loss improved. New best score: 1.256
Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 1.195
Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 1.194
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.184
Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 1.179
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.177
Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 1.160
Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.152
Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.145
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.135
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.133
Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 1.128
Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.120
Metric val_loss improved by 0.000 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params
--------------------------------------------------
0 | attention  | Sequential         | 278 K 
1 | classifier | Sequential         | 12.3 K
2 | loss       | CrossEntropyLoss   | 0     
3 | acc        | MulticlassAccuracy | 0     
--------------------------------------------------
291 K     Trainable params
0         Non-trainable params
291 K     Total params
1.165     Total estimated model params size (MB)


training fold 4/5


Metric val_loss improved. New best score: 1.213
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.210
Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 1.191
Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.183
Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 1.171
Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 1.155
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.153
Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 1.139
Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 1.133
Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 1.117
Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.110
Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 1.098
Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 1.086
Metric val_loss improved by 0.003 >= min

training on device: cpu


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type               | Params
--------------------------------------------------
0 | attention  | Sequential         | 278 K 
1 | classifier | Sequential         | 12.3 K
2 | loss       | CrossEntropyLoss   | 0     
3 | acc        | MulticlassAccuracy | 0     
--------------------------------------------------
291 K     Trainable params
0         Non-trainable params
291 K     Total params
1.165     Total estimated model params size (MB)


training fold 5/5


Metric val_loss improved. New best score: 1.209
Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 1.177
Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.170
Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 1.159
Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.158
Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.149
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.139
Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 1.125
Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 1.107
Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.097
Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 1.083
Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.075
Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 1.067
Metric val_loss improved by 0.016 >= min

training on device: cpu


avg CV res for grade classification task using multimodal inputs over 5 folds: {'test_acc_epoch': 0.559}
