Дальше хочется сравнить две модели и посмотреть, какая из них лучше/хуже справляется с задачей (метрики f1-score, precision, recall). 

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import re
import torch
from torch.utils.data import DataLoader

from datasets import Dataset

from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

if torch.cuda.is_available():
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

from my_model import CustomModel, train_per_protein, load_model

There are 1 GPU(s) available.
We will use the GPU: Tesla V100S-PCIE-32GB


In [2]:
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
# torch.manual_seed(seed_val)
# torch.cuda.manual_seed_all(seed_val)

In [7]:
def create_dataset(tokenizer,seqs,seqs_labels):
    tokenized = tokenizer(seqs, max_length=25, padding = 'longest', truncation=True)
    dataset = Dataset.from_dict(tokenized)
    dataset = dataset.add_column("labels", seqs_labels)

    return dataset

In [12]:
test_dataset = pd.read_csv('test_dataset.csv')
ans = pd.read_csv('ans.csv')
test_dataset

Unnamed: 0,cdr3aa
0,C A S S L A G A G P Y N E Q F F
1,C A S S F Q G T S S Y Y T F
2,C A I G I A G G N E Q F F
3,C A S S L A A S G G R Y T Q Y F
4,C A L A S Y G N N R L A F
...,...
17218,C L L D R V S N Y Q L I W
17219,C A S S P Q G V T G E L F F
17220,C A S S L L P G G A S P D T Q Y F
17221,C A S S Q D R D L L Q E T Q Y F


In [13]:
ans

Unnamed: 0,type
0,1
1,1
2,1
3,1
4,0
...,...
17218,0
17219,1
17220,1
17221,1


In [4]:
TCRbert = load_model("./TCR_bert_finetuned.pth", mod_type='TCR-bert', num_labels=2)
TCRbert.maximun_len = 25

tokenizer, Prottrans = load_model("./PT5_GB1_finetuned.pth", num_labels=2, mixed=False)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at wukevin/tcr-bert and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([45, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([45]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at Rostlab/prot_t5_xl_uniref50 were not used when initializing T5EncoderModel: ['decoder.block.22.layer.2.DenseReluDense.wo.weight', 'decoder.block.1.layer.1.EncDecAttention.k.weight', 'decoder.block.4.layer.0.SelfAttention.o.weight', 'decoder.final_layer_norm.weight', 'decoder.block.14.layer.2.DenseReluDense.wo.weight', 'decoder.block.19.layer.1.layer_norm.weight', 'decoder.block.11.layer.0.layer_norm.weight', 'decoder.block.22.layer.1.En

ProtT5_Classfier
Trainable Parameter: 1209193474
ProtT5_LoRA_Classfier
Trainable Parameter: 3559426



## f1-score

In [5]:
TCRbert.to(device)
pred_labels = TCRbert.predict(test_dataset['cdr3aa'])

f1_sc_tcr = f1_score(ans, pred_labels)
print('Total f1_score: %.5f' % f1_sc_tcr)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Total f1_score: 0.99990


In [21]:
Prottrans.to(device)

test_set=create_dataset(tokenizer, list(test_dataset['cdr3aa']),list(ans['type']))
test_set = test_set.with_format("torch", device=device)

test_dataloader = DataLoader(test_set, batch_size=8, shuffle=False)

Prottrans.eval()

pred_labels_2 = []
with torch.no_grad():
    for batch in test_dataloader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        #add batch results(logits) to predictions
        pred_labels_2 += Prottrans(input_ids, attention_mask=attention_mask).logits.tolist()
pred_labels_2= [item.argmax() for item in np.array(pred_labels_2)]

f1_sc_prt = f1_score(ans, pred_labels_2)
print('Total f1_score: %.5f' % f1_sc_prt)

Total f1_score: 0.99966


## Accuracy

In [22]:
acc_tcr = accuracy_score(ans, pred_labels)
print('Total Accuracy TCRbert: %.5f' % acc_tcr)

acc_prt = accuracy_score(ans, pred_labels_2)
print('Total Accuracy Prottrans: %.5f' % acc_prt)

Total Accuracy TCRbert: 0.99988
Total Accuracy Prottrans: 0.99959


## precision

In [23]:
precision_tcr = precision_score(ans, pred_labels)
precision_prttr = precision_score(ans, pred_labels_2)

print('Total precision TCRbert: %.5f' % precision_tcr)
print('Total Accuracy Prottrans: %.5f' % precision_prttr)

Total precision TCRbert: 0.99990
Total Accuracy Prottrans: 0.99981


## recall

In [25]:
recall_tcr = recall_score(ans, pred_labels)
recall_prt = recall_score(ans, pred_labels_2)

print('Total recall TCRbert: %.5f' % recall_tcr)
print('Total recall Prottrans: %.5f' % recall_prt)

Total recall TCRbert: 0.99990
Total recall Prottrans: 0.99951
