# Find the model

## Global configurations

In [1]:
import logging
import logging.config
import os
import sys
import warnings
from enum import auto
import pandas as pd
import numpy as np
from IPython.core.display import display

rootdir = '/home/hym/trunk/TCRBert'
workdir = '%s/notebook' % rootdir
datadir = '%s/data' % rootdir
srcdir = '%s/tcrbert' % rootdir
outdir = '%s/output' % rootdir

os.chdir(workdir)

sys.path.append(rootdir)
sys.path.append(srcdir)

from tcrbert.exp import Experiment
from tcrbert.predlistener import PredResultRecoder


# Display
pd.set_option('display.max.rows', 2000)
pd.set_option('display.max.columns', 2000)

# Logger
warnings.filterwarnings('ignore')
logging.config.fileConfig('../config/logging.conf')
logger = logging.getLogger('tcrbert')
logger.setLevel(logging.INFO)

# Target experiment
exp_key = 'testexp'
Experiment.load_exp_conf('testexp', reload=True)
experiment = Experiment.from_key(exp_key)

exp_conf = experiment.exp_conf

display(exp_conf)

  from IPython.core.display import display


2023-11-22 17:00:33 [INFO]: Loaded exp_conf: {'title': 'testexp', 'description': 'testexp', 'paper': 'ASAP', 'model_config': '../config/bert-base/', 'outdir': '../output/testexp', 'train': {'pretrained_model': {'type': 'tape', 'location': '../config/bert-base/'}, 'data_parallel': True, 'backup': 'train.bak.{date}.tar.gz', 'rounds': [{'data': 'dash_vdjdb_mcpas', 'test_size': 0.2, 'batch_size': 256, 'n_epochs': 3, 'n_workers': 12, 'metrics': ['accuracy'], 'optimizer': {'type': 'adamw', 'lr': 0.0001}, 'train_bert_encoders': [-10, None], 'early_stopper': {'monitor': 'accuracy', 'patience': 1}, 'model_checkpoint': {'chk': 'train.{round}.model_{epoch}.chk', 'monitor': 'accuracy', 'save_best_only': True, 'period': 1}, 'result': 'train.{round}.result.json'}, {'data': 'iedb_sars2', 'test_size': 0.2, 'batch_size': 256, 'n_epochs': 3, 'n_workers': 12, 'metrics': ['accuracy'], 'optimizer': {'type': 'adamw', 'lr': 0.0001}, 'train_bert_encoders': [-6, None], 'early_stopper': {'monitor': 'accuracy', 

{'title': 'testexp',
 'description': 'testexp',
 'paper': 'ASAP',
 'model_config': '../config/bert-base/',
 'outdir': '../output/testexp',
 'train': {'pretrained_model': {'type': 'tape',
   'location': '../config/bert-base/'},
  'data_parallel': True,
  'backup': 'train.bak.{date}.tar.gz',
  'rounds': [{'data': 'dash_vdjdb_mcpas',
    'test_size': 0.2,
    'batch_size': 256,
    'n_epochs': 3,
    'n_workers': 12,
    'metrics': ['accuracy'],
    'optimizer': {'type': 'adamw', 'lr': 0.0001},
    'train_bert_encoders': [-10, None],
    'early_stopper': {'monitor': 'accuracy', 'patience': 1},
    'model_checkpoint': {'chk': 'train.{round}.model_{epoch}.chk',
     'monitor': 'accuracy',
     'save_best_only': True,
     'period': 1},
    'result': 'train.{round}.result.json'},
   {'data': 'iedb_sars2',
    'test_size': 0.2,
    'batch_size': 256,
    'n_epochs': 3,
    'n_workers': 12,
    'metrics': ['accuracy'],
    'optimizer': {'type': 'adamw', 'lr': 0.0001},
    'train_bert_encoders'

## Test training

In [2]:
from tcrbert.dataset import TCREpitopeSentenceDataset, CN
from collections import OrderedDict, Counter
from torch.utils.data import DataLoader

sh_ds = TCREpitopeSentenceDataset.from_key('shomuradova')
sh_df = sh_ds.df_enc
im_ds = TCREpitopeSentenceDataset.from_key('immunecode')

# Remove duplicated CDR3beta seqs with Shomuradova
im_ds.df_enc = im_ds.df_enc[
        im_ds.df_enc[CN.cdr3b].map(lambda seq: seq not in sh_df[CN.cdr3b].values)
]
im_df = im_ds.df_enc

metrics = ['accuracy', 'f1', 'roc_auc']

# Train
experiment.train()

# Backup the train results
experiment.backup_train_results()

# Get best model and evaluate the model
model = experiment.load_eval_model()
eval_recoder = PredResultRecoder(output_attentions=True, output_hidden_states=True)
model.add_pred_listener(eval_recoder)    
data_loader = DataLoader(ds, batch_size=len(ds), shuffle=False, num_workers=2)
logger.info('Predicting for %s' % ds.name)
model.predict(data_loader=data_loader, metrics=metrics)

2023-11-22 17:00:42 [INFO]: shomuradova dataset was loaded from ../output/shomuradova.data.csv, df.shape: (610, 9)
2023-11-22 17:00:42 [INFO]: immunecode dataset was loaded from ../output/immunecode.data.csv, df.shape: (742, 9)
2023-11-22 17:00:42 [INFO]: Begin train at 2023-11-22 17:00:42.619333
2023-11-22 17:00:42 [INFO]: Loading the TAPE pretrained model from ../config/bert-base/
2023-11-22 17:00:43 [INFO]: Using DataParallel model with 2 GPUs
2023-11-22 17:00:43 [INFO]: Start 2 train rounds of testexp at 2023-11-22 17:00:42.619333
2023-11-22 17:00:43 [INFO]: train_conf: {'pretrained_model': {'type': 'tape', 'location': '../config/bert-base/'}, 'data_parallel': True, 'backup': 'train.bak.{date}.tar.gz', 'rounds': [{'data': 'dash_vdjdb_mcpas', 'test_size': 0.2, 'batch_size': 256, 'n_epochs': 3, 'n_workers': 12, 'metrics': ['accuracy'], 'optimizer': {'type': 'adamw', 'lr': 0.0001}, 'train_bert_encoders': [-10, None], 'early_stopper': {'monitor': 'accuracy', 'patience': 1}, 'model_chec

Training in epoch 0/3: 100%|██████████| 79/79 [01:00<00:00,  1.31batch/s]
Validating in epoch 0/3: 100%|██████████| 20/20 [00:10<00:00,  1.82batch/s]

2023-11-22 17:01:56 [INFO]: [EvalScoreRecoder]: In epoch 0/3, loss train score: 0.6775637993329688, val score: 0.6418470144271851
2023-11-22 17:01:56 [INFO]: [EvalScoreRecoder]: In epoch 0/3, accuracy train score: 0.568565168702086, val score: 0.6159489329268293
2023-11-22 17:01:56 [INFO]: [EarlyStopper]: In epoch 0/3, accuracy score: 0.6159489329268293, best accuracy score: -inf;update best score to 0.6159489329268293
2023-11-22 17:01:56 [INFO]: [ModelCheckpoint]: Checkpoint at epoch 0: accuracy improved from -inf to 0.6159489329268293, saving model to ../output/testexp/train.0.model_0.chk



Training in epoch 1/3: 100%|██████████| 79/79 [00:59<00:00,  1.34batch/s]
Validating in epoch 1/3: 100%|██████████| 20/20 [00:10<00:00,  1.84batch/s]

2023-11-22 17:03:06 [INFO]: [EvalScoreRecoder]: In epoch 1/3, loss train score: 0.5968728804890113, val score: 0.5593952238559723
2023-11-22 17:03:06 [INFO]: [EvalScoreRecoder]: In epoch 1/3, accuracy train score: 0.6559282514708504, val score: 0.680201981707317
2023-11-22 17:03:06 [INFO]: [EarlyStopper]: In epoch 1/3, accuracy score: 0.680201981707317, best accuracy score: 0.6159489329268293;update best score to 0.680201981707317
2023-11-22 17:03:06 [INFO]: [ModelCheckpoint]: Checkpoint at epoch 1: accuracy improved from 0.6159489329268293 to 0.680201981707317, saving model to ../output/testexp/train.0.model_1.chk



Training in epoch 2/3: 100%|██████████| 79/79 [00:58<00:00,  1.35batch/s]
Validating in epoch 2/3: 100%|██████████| 20/20 [00:10<00:00,  1.84batch/s]

2023-11-22 17:04:16 [INFO]: [EvalScoreRecoder]: In epoch 2/3, loss train score: 0.5154932401602781, val score: 0.5430145025253296
2023-11-22 17:04:16 [INFO]: [EvalScoreRecoder]: In epoch 2/3, accuracy train score: 0.7241661002406846, val score: 0.707398056402439
2023-11-22 17:04:16 [INFO]: [EarlyStopper]: In epoch 2/3, accuracy score: 0.707398056402439, best accuracy score: 0.680201981707317;update best score to 0.707398056402439
2023-11-22 17:04:16 [INFO]: [ModelCheckpoint]: Checkpoint at epoch 2: accuracy improved from 0.680201981707317 to 0.707398056402439, saving model to ../output/testexp/train.0.model_2.chk





2023-11-22 17:04:16 [INFO]: [EvalScoreRecoder]: loss train socres: [0.6775637993329688, 0.5968728804890113, 0.5154932401602781], val scores: [0.6418470144271851, 0.5593952238559723, 0.5430145025253296]
2023-11-22 17:04:16 [INFO]: [EvalScoreRecoder]: accuracy train socres: [0.568565168702086, 0.6559282514708504, 0.7241661002406846], val scores: [0.6159489329268293, 0.680201981707317, 0.707398056402439]
2023-11-22 17:04:16 [INFO]: End training...
2023-11-22 17:04:16 [INFO]: 0 train round result: {'metrics': ['accuracy'], 'train.score': OrderedDict([('loss', [0.6775637993329688, 0.5968728804890113, 0.5154932401602781]), ('accuracy', [0.568565168702086, 0.6559282514708504, 0.7241661002406846])]), 'val.score': OrderedDict([('loss', [0.6418470144271851, 0.5593952238559723, 0.5430145025253296]), ('accuracy', [0.6159489329268293, 0.680201981707317, 0.707398056402439])]), 'n_epochs': 3, 'stopped_epoch': 2, 'monitor': 'accuracy', 'best_epoch': 2, 'best_score': 0.707398056402439, 'best_chk': '../

Training in epoch 0/3: 100%|██████████| 309/309 [03:35<00:00,  1.44batch/s]
Validating in epoch 0/3:  99%|█████████▊| 77/78 [00:42<00:00,  1.80batch/s]


TypeError: Caught TypeError in replica 1 on device 1.
Original Traceback (most recent call last):
  File "/home/hym/miniconda3/envs/TCRBert/lib/python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
  File "/home/hym/miniconda3/envs/TCRBert/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
TypeError: forward() missing 1 required positional argument: 'input_ids'
