In [23]:
import argparse
from bertviz import model_view
import jsonlines
import os
import numpy as np
import pytorch_lightning as pl

from constants import ENTITY_START_MARKER, ENTITY_END_MARKER
from data_loader import  DrugSynergyDataModule, make_fixed_length
from model import RelationExtractor, load_model
from preprocess import create_dataset
from utils import construct_row_id_idx_mapping, set_seed, write_error_analysis_file

from streamlit_single_relation_app import classify_message

In [24]:
checkpoint_path = "/home/vijay/drug-synergy-models/checkpoints_more_data_with_drug_tokens_paragraph_2024_multiclass"

In [25]:
model, tokenizer, metadata = load_model(checkpoint_path, output_attentions=True)

In [26]:
tokenizer.add_tokens([ENTITY_START_MARKER, ENTITY_END_MARKER])
drugs = open("drugs.txt").read().lower().split()
tokenizer.add_tokens(drugs)

2377

In [58]:
message_text = "The aims of this study were to determine the effects of (a) combining the epidermal growth factor"+ \
" receptor (EGFR) blocker (erlotinib) and the cyclooxygenase-2 inhibitor (celecoxib) on cell growth and apoptosis"+ \
" in human pancreatic cancer cell lines, (b) baseline EGFR expression on the potentiation of erlotinib-induced"+ \
" apoptosis by celecoxib, and (c) the effects of the combination on the expression of the COX-2, EGFR, HER-2/neu,"+ \
" and nuclear factor-kappaB (NF-kappaB). Baseline expression of EGFR was determined by Western blot analysis in"+ \
" five human pancreatic cancer cell lines. BxPC-3, PANC-1, and HPAC had high EGFR and MIAPaCa had low EGFR. Cells"+ \
" were grown in culture and treated with erlotinib (1 and 10 micromol/L), celecoxib (1 and 10 micromol/L), and the"+ \
" combination. Growth inhibition was evaluated using 3-(4,5-dimethylthiazol-2-yl)-2,5-diphenyltetrazolium bromide"+ \
" assay, and apoptosis was assayed by ELISA. Reverse transcriptase-PCR was used to evaluate COX-2 and EGFR mRNA."+ \
" EGFR, COX-2, and HER-2/neu expression was determined by Western immunoblotting. Electrophoretic mobility shift"+ \
" assay was used to evaluate NF-kappaB activation. Growth inhibition and apoptosis were significantly ( P < 0.05 )"+ \
" higher in BxPC-3 , HPAC , and PANC-1 cells treated with <<m>> celecoxib <</m>> and <<m>> erlotinib <</m>> than cells "+ \
" treated with either"+ \
" celecoxib or erlotinib or cisplatin . However, no potentiation in growth inhibition or apoptosis was observed in the MIAPaCa"+ \
" cell line with low expression of the EGFR. Significant down-regulation of COX-2 and EGFR expression was observed"+ \
" in the BxPC-3 and HPAC cells treated with the combination of erlotinib (1 micromol/L) and celecoxib (10 micromol/L)"+ \
" compared with celecoxib- or erlotinib-treated cells. celecoxib significantly down-regulated HER-2/neu expression in"+ \
" BxPC-3 and HPAC cell lines. Significant inhibition of NF-kappaB activation was observed in BxPC-3 and HPAC cell"+ \
" lines treated with erlotinib and celecoxib. (a) celecoxib can potentiate erlotinib-induced growth inhibition and"+ \
" apoptosis in pancreatic cell lines, (b) high baseline EGFR expression is a predictor of this potentiation, and (c)"+ \
" the down-regulation of EGFR, COX-2, and HER-2/neu expression and NF-kappaB inactivation contributes to the"+ \
" potentiation of erlotinib by celecoxib, suggesting an alternative to cisplatin."

'''
message_text = "Various cutaneous side-effects have been reported with anti-melanoma systemic therapies. "+ \
"This study investigated the changes in melanocytic lesion pigmentation in patients on four different therapies."+ \
" ### methods We analysed the serial dermatoscopic photographs of atypical melanocytic lesions taken from patients"+ \
" with advanced metastatic melanoma on four different systemic therapies ( selective BRAF-inhibitor monotherapy ,"+ \
" <<m>> dabrafenib <</m>> combined with <<m>> trametinib <</m>> [ D&T ] , anti-programmed cell death protein 1 [ "+ \
"anti-PD1 ] therapies , and anti-PD1 combined with ipilimumab ) seen from February 2013 to May 2016 . We compared "+ \
"these changes with the melanocytic lesions of 10 control patients. ### results In the control group, 19% of naevi "+ \
"lightened, 64% did not change and 17% darkened. Only the BRAF inhibitor group showed more darkened lesions than "+ \
"controls (37%, P < 0.001). Meanwhile, there were more lightened naevi in the D&T therapy group (86%, P < 0.001) "+ \
"as well as the anti-PD1 and ipilimumab groups (59%, P < 0.001) than controls. Patients on anti-PD1 monotherapy "+ \
"had more lightened (49%) and fewer darkened naevi (9%) than controls, but differences were not significant. ###"+ \
" conclusions Our study showed that different anti-melanoma systemic therapies have different effects on the "+ \
"pigmentation of melanocytic lesions. BRAF inhibitor may have the propensity to cause darkening while D&T therapy"+ \
" and anti-PD1 caused lightening compared with controls. The findings emphasise the importance of regular "+ \
"dermatological monitoring in specialised clinics for patients on anti-melanoma systemic therapy. Clinicians "+ \
"should expect changes in the global pigmentation of melanocytic lesions but be suspicious of lesions with "+ \
"structural changes."
'''

'\nmessage_text = "Various cutaneous side-effects have been reported with anti-melanoma systemic therapies. "+ "This study investigated the changes in melanocytic lesion pigmentation in patients on four different therapies."+ " ### methods We analysed the serial dermatoscopic photographs of atypical melanocytic lesions taken from patients"+ " with advanced metastatic melanoma on four different systemic therapies ( selective BRAF-inhibitor monotherapy ,"+ " <<m>> dabrafenib <</m>> combined with <<m>> trametinib <</m>> [ D&T ] , anti-programmed cell death protein 1 [ "+ "anti-PD1 ] therapies , and anti-PD1 combined with ipilimumab ) seen from February 2013 to May 2016 . We compared "+ "these changes with the melanocytic lesions of 10 control patients. ### results In the control group, 19% of naevi "+ "lightened, 64% did not change and 17% darkened. Only the BRAF inhibitor group showed more darkened lesions than "+ "controls (37%, P < 0.001). Meanwhile, there were more lightened naevi in 

In [59]:
inputs = tokenizer.encode(message_text, return_tensors='pt')
tokens = tokenizer.convert_ids_to_tokens(inputs[0]) 

In [60]:
#tokens = make_fixed_length(tokens, metadata.max_seq_length, padding_value = "<PAD>")

In [61]:
model_output, attention = classify_message(message_text, model, tokenizer, metadata.max_seq_length)

In [62]:
model_output

{'predicted label': 0,
 'relation probabilities': [0.7053, 0.0817, 0.0294, 0.1836]}

In [63]:
def only_keep_nth_head(attention_layer, head_idxs, seq_length):
    return attention_layer[:, head_idxs, :seq_length, :seq_length]

In [64]:
attention_pruned = [only_keep_nth_head(attention[i], list(range(12)), len(tokens)) for i in range(12)]

In [66]:
def print_top_attended_words(attention_tensors, query_token_idx, layer_idx, attn_head_idx, num_words_to_show=10):
    top_self_attentions = np.argsort(-attention_tensors[layer_idx][0][attn_head_idx][query_token_idx].detach())[:num_words_to_show]
    for other_token_idx in top_self_attentions:
        word = f"{tokens[other_token_idx]} ({other_token_idx})"
        if len(word) <= 7:
            num_tabs = 4
        elif len(word) >= 16:
            num_tabs = 2
        else:
            num_tabs = 3
        tabs = "".join(["\t" for _ in range(num_tabs)])
        print(f"{tokens[other_token_idx]} ({other_token_idx}){tabs}{attention_pruned[layer_idx][0][attn_head_idx][start][other_token_idx]}")

TRANSFORMER_LAYER_IDX = 0
ATTENTION_HEAD_IDX = 4
start_token_idxs = [i for i, v in enumerate(tokens) if v == "<<m>>"]
for attention_head in range(12):
    start = start_token_idxs[0]
    print(f"Showing attention head {attention_head} in transformer layer {TRANSFORMER_LAYER_IDX}")
    print(f"Query entity: entity marker before \"{tokens[start+1]}\" ({start})")
    print("===================================================")
    print(f"Subword\t\t\t|\tAttention weight\n---------------------------------------------------")
    print_top_attended_words(attention_pruned, start, TRANSFORMER_LAYER_IDX, attention_head)
    print("\n\n")

Showing attention head 0 in transformer layer 0
Query entity: entity marker before "celecoxib" (300)
Subword			|	Attention weight
---------------------------------------------------
[CLS] (0)			0.06035729870200157
however (318)			0.01803034357726574
baseline (469)			0.017301596701145172
contributes (507)		0.015397525392472744
) (467)				0.010856201872229576
observed (328)			0.010762047953903675
) (482)				0.010375728830695152
nf (502)			0.01007224339991808
predictor (474)			0.009087681770324707
##b (505)			0.009011885151267052



Showing attention head 1 in transformer layer 0
Query entity: entity marker before "celecoxib" (300)
Subword			|	Attention weight
---------------------------------------------------
egfr (488)			0.01927526481449604
egfr (470)			0.018516073003411293
egfr (352)			0.017373019829392433
induced (455)			0.013868839479982853
regulation (486)		0.01262357085943222
egfr (341)			0.011522654443979263
immunoblotting (254)		0.011162650771439075
regulated (402)			0.011134881

# model_view(attention_pruned, tokens[:metadata.max_seq_length])