## Data Generation at computerlab1

## Setting up notebook

In [1]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

import torch
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

from transformers import GPT2Tokenizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModelForSeq2SeqLM

from sentence_transformers import SentenceTransformer, util
from transformers import AdamW, T5ForConditionalGeneration, T5Tokenizer, get_linear_schedule_with_warmup

In [2]:
import re
import sys
import random
from random import choices
import os
from itertools import chain
from string import punctuation
import time
import argparse
import glob

import csv
import json

import traceback
import gc
from enum import Enum 

import numpy as np
import pandas as pd
from statistics import mean

import matplotlib.pyplot as plt


# from trl.gpt2 import GPT2HeadWithValueModel, respond_to_batch
# from trl.ppo import PPOTrainer
#from trl.core import build_bert_batch_from_txt

from IPython.core.display import Markdown,display, HTML, Latex
import qgrid

from verisci.covid import AbstractRetriever, RationaleSelector, LabelPredictor
from verisci.evaluate.lib.data import GoldDataset

from GPUtil import showUtilization as gpu_usage
import wandb

from tqdm.notebook import tqdm
tqdm.pandas()
import pickle

In [3]:
from transformers import AutoTokenizer, AutoModel

In [4]:
from datetime import datetime
cur_date_time = datetime.today().strftime('%Y_%m_%d_%H_%M')
log_dir = "../../logs/"
project_name = 'Scifact_paraphrase_T5_scifive_per_evo_general_concate_prev_tuned_data_sup_to_ref_threshold_no_filter_tech_v0'
version = '0.0.1'
log_file_dir_name = log_dir+project_name+'_'+version+'.log'

In [5]:
import logging
  
#Create and configure logger
logging.basicConfig(filename=log_file_dir_name,
                    level=logging.INFO,
                    format='%(message)s',
                    filemode='w')

log_file_fine_tune_callback = '../../logs/log_results.txt'

In [6]:
pd.set_option("display.precision", 2)

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
class ParaphraseTargetDirection(Enum):
    org_support_to_gen_refute = 0
    org_refute_to_gen_support = 1
    
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(37)    

In [9]:
PARAPHRASE_PROJECT_SETTINGS={
    'file_and_dirs': {
        'file_paraphrased_no_tune_all_model_full' : '../../dfs_generated/paraphrased/t5_no_fine_tune_generated_claim_all_model_df_full_1.pkl', # can be deleted
        'file_org_claims_by_scifact' : '../../dfs_generated/scifact/org_claim_ext_label_roberta_large_fever.pkl',
    },
    'config_scifact' : {
        'cls_model_name': '../../scifact/model/label_roberta_large_fever_scifact',
        'rationale_model_name': '../../scifact/model/rationale_roberta_large_fever_scifact',
        'loc_gold_ds_corpus' : '../../scifact/data/corpus.jsonl', 
        'loc_gold_ds_train' : '../../scifact/data/claims_train.jsonl', 
        'loc_gold_ds_dev' : '../../scifact/data/claims_dev.jsonl', 

    },
    
    
    'paraphrase_model' :
    {
        'list_potential_paraphrase_models' : 
            [
                {'model_name' : 'parasci_base_no_fine_tune' , 'model_path_or_url' : 'HelloRusk/t5-base-parasci', 'is_selected' : False},
                {'model_name' : 'parrot_base_no_fine_tune' , 'model_path_or_url' : 'prithivida/parrot_paraphraser_on_T5', 'is_selected' : False},
                {'model_name' : 'parrot_div_base_no_fine_tune' , 'model_path_or_url' : 'prithivida/parrot_paraphraser_on_T5', 'is_selected' : False},
                {'model_name' : 'pegasus_base_no_fine_tune' , 'model_path_or_url' : 'tuner007/pegasus_paraphrase', 'is_selected' : False},
                {'model_name' : 'paws_base_no_fine_tune' , 'model_path_or_url' : 'Vamsi/T5_Paraphrase_Paws', 'is_selected' : True},
                {'model_name' : 'tapaco_base_no_fine_tune' , 'model_path_or_url' : 'hetpandya/t5-base-tapaco', 'is_selected' : False},
                {'model_name' : 'sci_five_pubmed' , 'model_path_or_url' : 'razent/SciFive-large-Pubmed_PMC', 'is_selected' : False}
            ],
        't5_paraphrase_model_params':
        {
            'max_length':256,
            'do_sample':True,
            'top_k':50,
            'top_p': 0.99,
            'repetition_penalty':3.5,
            'early_stopping':True,
            'num_return_sequences':20
        }
    },
    
    'run_settings':
    {
        'PARAPHRASE_FT_TRAIN_SPLIT' : 0.1,
        'PARAPHRASE_FT_DATASET_DIRECTION' : ParaphraseTargetDirection.org_support_to_gen_refute,#ParaphraseTargetDirection.org_support_to_gen_refute,#ParaphraseTargetDirection.org_refute_to_gen_support,
        'NUM_OF_EPOCH_REQ_FT' : 2,
        'FILTER_BY' : 'TECH_TERMS',
        'SIMILARITY_THRESHOLD' : -100
        #'CUR_MODEL_NAME_PATHS' : (lambda: [_x['model_path_or_url'] for  _x in PARAPHRASE_PROJECT_SETTINGS['paraphrase_model']['list_potential_paraphrase_models'] if _x['is_selected'] == True])(),
    },
}

CUR_NO_OF_EPOCH_FT = 0

## Scifact Functinos

### Load data

In [10]:
def get_claim_label_from_jsonl(dataset_jsonl):
    claim_label_list_train = []


    for cur_claim in dataset_jsonl:
        claim_txt = cur_claim.claim

        for doc_id, evidence in cur_claim.evidence.items():

            ev_doc = cur_claim.release.corpus.get_document(doc_id)

            claim_label = evidence.label.name

            tmp_dic = {"claim" : claim_txt, "label" : claim_label}

            claim_label_list_train.append(tmp_dic)
    return claim_label_list_train

In [11]:
def get_claim_label_evidence_from_jsonl(dataset_jsonl, source):
    claim_label_list_train = []


    for cur_claim in dataset_jsonl:
        claim_txt = cur_claim.claim

        for doc_id, evidence in cur_claim.evidence.items():

            ev_doc = claim_train.release.corpus.get_document(doc_id)

            claim_label = evidence.label.name
            
            list_rationales = []
            for i, sents in enumerate(evidence.rationales):
                list_rationales = [sent for i, sent in enumerate(ev_doc.sentences) if i in sents]

            tmp_dic = {"claim" : claim_txt, "label" : claim_label, "list_rationales" :list_rationales, "source" :source}

            claim_label_list_train.append(tmp_dic)
    return claim_label_list_train

In [12]:
ds_train = GoldDataset(PARAPHRASE_PROJECT_SETTINGS['config_scifact']['loc_gold_ds_corpus'],
                       PARAPHRASE_PROJECT_SETTINGS['config_scifact']['loc_gold_ds_train'])
claim_train = ds_train.get_claim(39)
claim_train.pretty_print()

dic_train = get_claim_label_evidence_from_jsonl(ds_train, source = "train")

Example 39: A diminished ovarian reserve does not solely indicate infertility in an a priori non-infertile population.

Evidence sets:

####################

13497630: SUPPORTS
Set 0:
	- After adjusting for age, body mass index, race, current smoking status, and recent hormonal contraceptive use, women with low AMH values (<0.7 ng/mL [n = 84]) did not have a significantly different predicted probability of conceiving by 6 cycles of attempt (65%; 95% CI, 50%-75%) compared with women (n = 579) with normal values (62%; 95% CI, 57%-66%) or by 12 cycles of attempt (84% [95% CI, 70%-91%] vs 75% [95% CI, 70%-79%], respectively).
Set 1:
	- Women with high serum FSH values (>10 mIU/mL [n = 83]) did not have a significantly different predicted probability of conceiving after 6 cycles of attempt (63%; 95% CI, 50%-73%) compared with women (n = 654) with normal values (62%; 95% CI, 57%-66%) or after 12 cycles of attempt (82% [95% CI, 70%-89%] vs 75% [95% CI, 70%-78%], respectively).
Set 2:
	- Women

In [13]:
ds_valid = GoldDataset(PARAPHRASE_PROJECT_SETTINGS['config_scifact']['loc_gold_ds_corpus'],
                       PARAPHRASE_PROJECT_SETTINGS['config_scifact']['loc_gold_ds_dev'])
claim_valid = ds_valid.get_claim(42)
claim_valid.pretty_print()

dic_valid = get_claim_label_evidence_from_jsonl(ds_valid, source = "dev")

Example 42: A high microerythrocyte count raises vulnerability to severe anemia in homozygous alpha (+)- thalassemia trait subjects.

Evidence sets:

####################

18174210: REFUTES
Set 0:
	- Individuals homozygous for alpha(+)-thalassaemia have microcytosis and an increased erythrocyte count.
	- We estimated that the haematological profile in children homozygous for alpha(+)-thalassaemia reduces the risk of SMA during acute malaria compared to children of normal genotype (relative risk 0.52; 95% confidence interval [CI] 0.24-1.12, p = 0.09).   

Set 1:
	- CONCLUSIONS The increased erythrocyte count and microcytosis in children homozygous for alpha(+)-thalassaemia may contribute substantially to their protection against SMA.


In [14]:
df_claim_evid_label = pd.concat([pd.DataFrame(dic_train), pd.DataFrame(dic_valid)], ignore_index=True)

#df_claim_evid_label

In [15]:
df_claim_evid_label

Unnamed: 0,claim,label,list_rationales,source
0,1 in 5 million in UK have abnormal PrP positiv...,REFUTES,"[RESULTS Of the 32,441 appendix samples 16 wer...",train
1,32% of liver transplantation programs required...,SUPPORTS,[Policies requiring discontinuation of methado...,train
2,40mg/day dosage of folic acid and 2mg/day dosa...,SUPPORTS,[CONCLUSION Treatment with high doses of folic...,train
3,76-85% of people with severe mental disorder r...,SUPPORTS,[Although disorder severity was correlated wit...,train
4,A T helper 2 cell (Th2) environment impedes di...,REFUTES,"[Thus, in Lyn(-/-) mice, basophils and IgE aut...",train
...,...,...,...,...
768,Women with a higher birth weight are more like...,SUPPORTS,[Increased risk of breast cancer was noted wit...,dev
769,Women with a higher birth weight are more like...,SUPPORTS,[RESULTS We found that heavier birth weights w...,dev
770,aPKCz causes tumour enhancement by affecting g...,REFUTES,"[Taken together, this demonstrates that PKCζ i...",dev
771,cSMAC formation enhances weak ligand signalling.,SUPPORTS,[This conclusion was supported by experiments ...,dev


### Scifact Model

In [17]:
class ArgsScifact:
    def __init__(self, claim):
        self.claim = claim
        self.report_file = "../../scifact/results/covid/report" #not needed
        self.n_documents = 100
        self.rationale_selection_method = "topk"
        self.output_format = "markdown"
        self.rationale_threshold = 0.5
        self.label_threshold = 0.5
        self.keep_nei = False
        self.full_abstract = True
        self.verbose = True
        self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        
        ##
class PretrainedModelsForScifact:
    def __init__(self, args):
        if args.device is None:
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = torch.device(args.device)
            
        #self.rationale_selection_model = '/home/qudratealahyratu/research/nlp/fact_checking/my_work/scifact/model/rationale_roberta_large_scifact'
        self.rationale_selection_model = PARAPHRASE_PROJECT_SETTINGS['config_scifact']['rationale_model_name']
        self.label_prediction_model = PARAPHRASE_PROJECT_SETTINGS['config_scifact']['cls_model_name']
        self.abstract_retriever = AbstractRetriever()
        self.rationale_selector = RationaleSelector(self.rationale_selection_model,
                                               args.rationale_selection_method,
                                               args.rationale_threshold,
                                               self.device)
        self.label_predictor = LabelPredictor(self.label_prediction_model,
                                         args.keep_nei,
                                         args.label_threshold,
                                         self.device)


In [18]:
args_sci = ArgsScifact("")

pretrained_models_config = PretrainedModelsForScifact(args_sci)

Some weights of the model checkpoint at ../../scifact/model/rationale_roberta_large_fever_scifact were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at ../../scifact/model/label_roberta_large_fever_scifact were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSe

In [19]:
log_failed_claim = []
def inference(args, pretraind_models_config):

    try:
#         if args.verbose:
#         print("Retrieving abstracts. inference > ", args.claim)
        results = pretraind_models_config.abstract_retriever(args.claim, k=args.n_documents)
        if len(results) == 0:
            temp_dic = {'failed_in' : 'abstract retrival', 'claim': args.claim}
            log_failed_claim.append(temp_dic)
            return []
        
        #print("abstract_retriever >> ", results)

#         if args.verbose:
#             print("Selecting rationales. inference > ", args.claim)
        results = pretraind_models_config.rationale_selector(args.claim, results)
        if len(results) == 0:
            temp_dic = {'failed_in' : 'Rationale selection', 'claim': args.claim}
            log_failed_claim.append(temp_dic)
            return []
        
#         if args.verbose:
#             print("Label predictions. inference > ", args.claim)
        results = pretraind_models_config.label_predictor(args.claim, results)

        if len(results) == 0:
            temp_dic = {'failed_in' : 'Label Prediction', 'claim': args.claim}
            log_failed_claim.append(temp_dic)
            return []
        
        results.sort(key=lambda r: r['label_confidence'], reverse=True)
        return results
    except Exception as e:
        print("Exception :: Inference cant retrive info for >> ", args.claim)
        print(sys.exc_info()[0])
        print(traceback.format_exc())
        temp_dic = {'failed_in' : sys.exc_info()[0], 'claim': args.claim}
        log_failed_claim.append(temp_dic)
        return []


In [20]:
def write_result(result, full_abstract):
    all_msg = ""
    all_msg = f"#### [{result['title']}]({result['url']}) \n"
    #print(msg, file=f)
    #all_msg = all_msg+msg
    ev_scores = [f"{x:0.2f}" for x in result["evidence_confidence"]]
    ev_scores = ", ".join(ev_scores)
    if result['label'].lower() == "support":
        msg = f"🟩 **Decision** : {result['label']} (score={result['label_confidence']:0.2f}, evidence scores={ev_scores})\n"
    elif result['label'].lower() == "refute":
        msg = f"🟥 **Decision** : {result['label']} (score={result['label_confidence']:0.2f}, evidence scores={ev_scores})\n"
    else:
        msg = f"⏺ **Decision** : {result['label']} (score={result['label_confidence']:0.2f}, evidence scores={ev_scores})\n"
    #print(msg, file=f)
    all_msg = all_msg+msg 
    
    for i, line in enumerate(result["abstract"]):
        # If we're showing the full abstract, show evidence in green.
        if full_abstract:
            if result['label'].lower() == "support":
                msg = (f"- <span style='color:green'>{line}</span>"
                       if i in result["evidence"]
                       else f"- {line}")
            elif result['label'].lower() == "refute":
                msg = (f"- <span style='color:red'>{line}</span>"
                       if i in result["evidence"]
                       else f"- {line}")                
            #print(msg, file=f)
            all_msg = all_msg+msg + " \n"
        else:
            if i in result["evidence"]:
                msg = f"- {line}"
                #print(msg, file=f)
                all_msg = all_msg+msg + " \n" 
    
    #print(file=f)
    #print(40 * "-", file=f)
    #print(file=f)
    all_msg = all_msg+msg 
    return all_msg + "\n"

In [21]:
def export(args, results):
    all_msg = ""
    claim = args.claim
    #report_file = args.report_file
    #f = open(f"{report_file}.md", "w")
    msg = f"### Claim \n > **{claim}** \n "
    #print(msg, file=f)
    #print(file=f)
    all_msg = all_msg +msg
    
    #support_confs = [], refute_confs = []
    confs = []
    for result in results:
        if result['label'].lower() == "support":
            tmp_dic = {'label' : 'Support', 'label_confidence' : result["label_confidence"], "no_of_evidence" : len(result['evidence_confidence'])}
            confs.append(tmp_dic)
        elif result['label'].lower() == "refute":
            tmp_dic = {'label' : 'Refute', 'label_confidence' : -result["label_confidence"], "no_of_evidence" : len(result['evidence_confidence'])}
            confs.append(tmp_dic)
        
    
    tpm_df = pd.DataFrame(confs)
    #HTML(tpm_df.style.bar(align='mid', color=['#d65f5f', '#5fba7d']))
    display(HTML(tpm_df.style.bar(subset=["label_confidence"], align='mid', color=['#ffa1a1', '#bfffcf']).render()))
    
    msg = "### Evidence \n "
    all_msg = all_msg +msg
    for result in results:
        cur_msg = write_result(result, args.full_abstract)
        all_msg = all_msg +cur_msg+"\n"

    return all_msg

In [22]:
claim_to_check = "ART substantially reduces infectiveness of HIV-positive people."#df_claim_evid_label.iloc[18, :]["claim"]
args_sci = ArgsScifact(claim_to_check)

#pretrained_models_config = pretrained_models_for_scifact(args_sci)

results_raw = inference(args_sci, pretrained_models_config)

if results_raw!= []:
    result_md = export(args_sci, results_raw)
    #result_md = export(args_sci, results_raw)
    display(Markdown(result_md))



Unnamed: 0,label,label_confidence,no_of_evidence
0,Support,0.69,3
1,Support,0.64,3
2,Support,0.54,3


### Claim 
 > **ART substantially reduces infectiveness of HIV-positive people.** 
 ### Evidence 
 #### [Autonomous Targeting of Infectious Superspreaders Using Engineered Transmissible Therapies](https://api.semanticscholar.org/10.1371/journal.pcbi.1002015) 
🟩 **Decision** : SUPPORT (score=0.69, evidence scores=0.11, 0.06, 0.01)
- Infectious disease treatments, both pharmaceutical and vaccine, face three universal challenges: the difficulty of targeting treatments to high-risk ‘superspreader’ populations who drive the great majority of disease spread, behavioral barriers in the host population (such as poor compliance and risk disinhibition), and the evolution of pathogen resistance. 
- Here, we describe a proposed intervention that would overcome these challenges by capitalizing upon Therapeutic Interfering Particles (TIPs) that are engineered to replicate conditionally in the presence of the pathogen and spread between individuals — analogous to ‘transmissible immunization’ that occurs with live-attenuated vaccines (but without the potential for reversion to virulence). 
- Building on analyses of HIV field data from sub-Saharan Africa, we construct a multi-scale model, beginning at the single-cell level, to predict the effect of TIPs on individual patient viral loads and ultimately population-level disease prevalence. 
- <span style='color:green'>Our results show that a TIP, engineered with properties based on a recent HIV gene-therapy trial, could stably lower HIV/AIDS prevalence by ∼30-fold within 50 years and could complement current therapies.</span> 
- <span style='color:green'>In contrast, optimistic antiretroviral therapy or vaccination campaigns alone could only lower HIV/AIDS prevalence by <2-fold over 50 years.</span> 
- The TIP's efficacy arises from its exploitation of the same risk factors as the pathogen, allowing it to autonomously penetrate superspreader populations, maintain efficacy despite behavioral disinhibition, and limit viral resistance. 
- <span style='color:green'>While demonstrated here for HIV, the TIP concept could apply broadly to many viral infectious diseases and would represent a new paradigm for disease control, away from pathogen eradication but toward robust disease suppression.</span> 
- <span style='color:green'>While demonstrated here for HIV, the TIP concept could apply broadly to many viral infectious diseases and would represent a new paradigm for disease control, away from pathogen eradication but toward robust disease suppression.</span>

#### [HIV: Biology to Treatment](https://api.semanticscholar.org/10.1007/978-981-32-9898-9_7) 
🟩 **Decision** : SUPPORT (score=0.64, evidence scores=0.67, 0.01, 0.37)
- AIDS is one of the most dreaded diseases of the twenty-first century caused by human immunodeficiency virus (HIV). 
- <span style='color:green'>Recently, there are reports which show decline in new infections due to better access to anti-retroviral drugs.</span> 
- <span style='color:green'>Still on a daily basis, ~2356 new HIV infections are being reported globally.</span> 
- New treatments and anti-HIV drugs are being continuously developed with the aim to control and cure AIDS. 
- The anti-HIV drugs that are in use usually target HIV entry and replication inside the host cells. 
- <span style='color:green'>However, these drugs are only partially effective in slowing the rate of HIV replication.</span> 
- Nevertheless, the virus manages to replicate at much slower rates even when anti-retroviral treatment is ongoing. 
- The HIV seropositives who are on anti-retroviral treatment for long periods of time are now developing different kinds of other complications including neuroAIDS. 
- The latest development in HIV therapy is a novel kind of bone marrow transplantation from donors who have a homozygous mutation in CCR5 gene. 
- The latest development in HIV therapy is a novel kind of bone marrow transplantation from donors who have a homozygous mutation in CCR5 gene.

#### [Human Immunodeficiency Virus-Associated Diarrhea: Still an Issue in the Era of Antiretroviral Therapy](https://api.semanticscholar.org/10.1007/s10620-015-3615-y) 
🟩 **Decision** : SUPPORT (score=0.54, evidence scores=0.01, 0.99, 0.02)
- <span style='color:green'>Over half of patients with human immunodeficiency virus (HIV) experience diarrhea that contributes negatively to quality of life and adherence to antiretroviral therapy (ART).</span> 
- Opportunistic infectious agents that cause diarrhea in patients with HIV span the array of protozoa, fungi, viruses, and bacteria. 
- <span style='color:green'>With global use of ART, the incidence of diarrhea because of opportunistic infections has decreased; however, the incidence of noninfectious diarrhea has increased.</span> 
- <span style='color:green'>The etiology of noninfectious diarrhea in patients with HIV is multifactorial and includes ART-associated diarrhea and gastrointestinal damage related to HIV infection (i.e., HIV enteropathy).</span> 
- A basic algorithm for the diagnosis of diarrhea in patients with HIV includes physical examination, a review of medical history, assessment of HIV viral load and CD4+ T cell count, stool microbiologic assessment, and endoscopic evaluation, if needed. 
- For patients with negative diagnostic results, the diagnosis of noninfectious diarrhea may be considered. 
- Pharmacologic options for the treatment of noninfectious diarrhea are primarily supportive; however, the use of many unapproved agents is based on unstudied and anecdotal information. 
- In addition, these agents can be associated with treatment-limiting adverse events (AEs), such as drug–drug interactions with ART regimens, abuse liability, and additional gastrointestinal AEs. 
- Currently, crofelemer, an antisecretory agent, is the only therapy approved in the USA for the symptomatic relief of noninfectious diarrhea in patients with HIV on ART. 
- Currently, crofelemer, an antisecretory agent, is the only therapy approved in the USA for the symptomatic relief of noninfectious diarrhea in patients with HIV on ART.



In [21]:
#print(pretraind_models_config)

## Load paraphrased data by Other models

In [29]:
#cur_model_outcome_dir = './results/paraphrased/paws/cumulative_threshold_07/'
cur_model_outcome_dir = '../../dfs_generated/paraphrased/existing_literature_methods/scifacti_as_black_box_model/'
cur_models_all_files =  glob.glob(os.path.join(cur_model_outcome_dir, '*.pkl'))

In [30]:
list_cur_itr_model_dfs = []
for cur_model_cur_file_name in cur_models_all_files:
    tmp_df = pd.read_pickle(cur_model_cur_file_name)
    #tmp_df['file_name'] = os.path.basename(cur_model_cur_file_name)
    list_cur_itr_model_dfs.append(tmp_df)

In [31]:
df_cur_itr_model_all_res = pd.concat(list_cur_itr_model_dfs, axis=0, ignore_index=True)

In [35]:
df_cur_itr_model_all_res

Unnamed: 0,org_claim,gen_claim,num_of_query,attack_result,attack_method
0,A T helper 2 cell (Th2) environment impedes di...,A T helper 2 cell (Th2) environment impedes di...,1,SkippedAttackResult,checklist_ribeiro_2020
1,A breast cancer patient's capacity to metaboli...,A breast cancer patient's capacity to metaboli...,1,SkippedAttackResult,checklist_ribeiro_2020
2,A diminished ovarian reserve does not solely i...,A diminished ovarian reserve does not solely i...,1,SkippedAttackResult,checklist_ribeiro_2020
3,ALDH1 expression is associated with poorer pro...,ALDH1 expression is associated with poorer pro...,1,SkippedAttackResult,checklist_ribeiro_2020
4,AMP-activated protein kinase (AMPK) activation...,AMP-activated protein kinase (AMPK) activation...,1,SkippedAttackResult,checklist_ribeiro_2020
...,...,...,...,...,...
961,There is an inverse relationship between hip f...,There is an inverse relationship between hip f...,1,SkippedAttackResult,bae_garg_2019
962,Thigh-length graduated compression stockings (...,Thigh-length graduated compression stockings (...,1,SkippedAttackResult,bae_garg_2019
963,Tirasemtiv has no effect on fast-twitch muscle.,Tirasemtiv has no effect on fast-twitch muscle.,1,SkippedAttackResult,bae_garg_2019
964,Transplanted human glial progenitor cells are ...,Transplanted human glial progenitor cells are ...,1,SkippedAttackResult,bae_garg_2019


In [41]:
print(df_cur_itr_model_all_res['num_of_query'].value_counts())
print(df_cur_itr_model_all_res['attack_result'].value_counts())
print(df_cur_itr_model_all_res['attack_method'].value_counts())

1    966
Name: num_of_query, dtype: int64
SkippedAttackResult    966
Name: attack_result, dtype: int64
checklist_ribeiro_2020    322
TextFoolerJin2019         322
bae_garg_2019             322
Name: attack_method, dtype: int64


In [42]:
df_cur_itr_model_all_res[df_cur_itr_model_all_res['org_claim'] == df_cur_itr_model_all_res['gen_claim']]

Unnamed: 0,org_claim,gen_claim,num_of_query,attack_result,attack_method
0,A T helper 2 cell (Th2) environment impedes di...,A T helper 2 cell (Th2) environment impedes di...,1,SkippedAttackResult,checklist_ribeiro_2020
1,A breast cancer patient's capacity to metaboli...,A breast cancer patient's capacity to metaboli...,1,SkippedAttackResult,checklist_ribeiro_2020
2,A diminished ovarian reserve does not solely i...,A diminished ovarian reserve does not solely i...,1,SkippedAttackResult,checklist_ribeiro_2020
3,ALDH1 expression is associated with poorer pro...,ALDH1 expression is associated with poorer pro...,1,SkippedAttackResult,checklist_ribeiro_2020
4,AMP-activated protein kinase (AMPK) activation...,AMP-activated protein kinase (AMPK) activation...,1,SkippedAttackResult,checklist_ribeiro_2020
...,...,...,...,...,...
961,There is an inverse relationship between hip f...,There is an inverse relationship between hip f...,1,SkippedAttackResult,bae_garg_2019
962,Thigh-length graduated compression stockings (...,Thigh-length graduated compression stockings (...,1,SkippedAttackResult,bae_garg_2019
963,Tirasemtiv has no effect on fast-twitch muscle.,Tirasemtiv has no effect on fast-twitch muscle.,1,SkippedAttackResult,bae_garg_2019
964,Transplanted human glial progenitor cells are ...,Transplanted human glial progenitor cells are ...,1,SkippedAttackResult,bae_garg_2019


## Paraphrase Analysis

In [26]:
def get_stat_of_original_claim(row_org_claim):
    claim =  row_org_claim["org_claim"]
    logging.info("#### \n\n>>> Original claim >>> ")
    logging.info(claim)

    args_sci = ArgsScifact(claim)
    dic_info = {}
    
    dic_info["org_claim"] = row_org_claim["org_claim"]
    dic_info["num_of_query"] = row_org_claim["num_of_query"]
    dic_info["attack_result"] = row_org_claim["attack_result"]
    dic_info["attack_method"] = row_org_claim["attack_method"]
    dic_info["org_count_support"] = 0
    dic_info["org_count_refute"] = 0
    dic_info["org_list_supported_ids"] = []
    dic_info["org_list_refuted_ids"] = []
    dic_info["org_list_supported_confidence"] = []
    dic_info["org_list_refuted_confidence"] = []
    dic_info["org_list_supported_confidence_mean"] = 0.0
    dic_info["org_list_refuted_confidence_mean"] = 0.0
    dic_info["org_comment"] = ""
    
    try:
        results_raw_org = inference(args_sci, pretrained_models_config)  
        
        if results_raw_org == []:
            dic_info["org_comment"] = "no result"
            
        else:
            list_supported_ids = [cur_result['id'] for cur_result in results_raw_org if cur_result['label'] == 'SUPPORT']
            list_refuted_ids = [cur_result['id'] for cur_result in results_raw_org if cur_result['label'] == 'REFUTE']
            list_supported_label_confidence =  [cur_result['label_confidence'] for cur_result in results_raw_org if cur_result['label'] == 'SUPPORT']
            list_refuted_label_confidence =  [cur_result['label_confidence'] for cur_result in results_raw_org if cur_result['label'] == 'REFUTE']
            
            
            dic_info["org_count_support"] = len(list_supported_ids)
            dic_info["org_count_refute"] = len(list_refuted_ids)
            dic_info["org_list_supported_ids"] = list_supported_ids
            dic_info["org_list_refuted_ids"] = list_refuted_ids
            dic_info["org_list_supported_confidence"] = list_supported_label_confidence
            dic_info["org_list_refuted_confidence"] = list_refuted_label_confidence
            if len(list_supported_label_confidence) > 0:
                dic_info["org_list_supported_confidence_mean"] = mean(list_supported_label_confidence)
            if len(list_refuted_label_confidence) > 0:
                dic_info["org_list_refuted_confidence_mean"] = mean(list_refuted_label_confidence)
            dic_info["org_comment"] = "success"
            
    except Exception as e:
        dic_info["org_comment"] = "exception : "+e
        logging.info(">>> Exception original claim >>> ") 
        logging.info(claim) 
        logging.info(e)
    
    finally:
        return dic_info

In [27]:
def get_results_by_gen_claim(gen_claim, dic_original_claim_info):
    logging.info(":: Generated claim :: ")
    logging.info(gen_claim)
    #print(gen_claim)
    
    args_gen = ArgsScifact(gen_claim)
    gen_dic_info = {}
    
    gen_dic_info["gen_claim"] = gen_claim
    gen_dic_info["gen_count_support"] = 0
    gen_dic_info["gen_count_refute"] = 0
    gen_dic_info["gen_list_supported_ids"] = []
    gen_dic_info["gen_list_refuted_ids"] = []
    gen_dic_info["gen_list_supported_confidence"] = []
    gen_dic_info["gen_list_refuted_confidence"] = []
    gen_dic_info["gen_list_supported_confidence_mean"] = 0.0
    gen_dic_info["gen_list_refuted_confidence_mean"] = 0.0
    gen_dic_info["gen_comment"] = ""    
    
    gen_dic_info["common_all"] = 0
    gen_dic_info["common_support_refute"] = 0
    gen_dic_info["common_refute_support"] = 0
    gen_dic_info["common_support_support"] = 0
    gen_dic_info["common_refute_refute"] = 0
    
    try:
        results_raw_gen = inference(args_gen, pretrained_models_config)  

        if results_raw_gen == []:
            gen_dic_info["gen_comment"] = "no result"
            
        else:
            list_supported_ids = [cur_result['id'] for cur_result in results_raw_gen if cur_result['label'] == 'SUPPORT']
            list_refuted_ids = [cur_result['id'] for cur_result in results_raw_gen if cur_result['label'] == 'REFUTE']
            list_supported_label_confidence =  [cur_result['label_confidence'] for cur_result in results_raw_gen if cur_result['label'] == 'SUPPORT']
            list_refuted_label_confidence =  [cur_result['label_confidence'] for cur_result in results_raw_gen if cur_result['label'] == 'REFUTE']
            
            
            gen_dic_info["gen_count_support"] = len(list_supported_ids)
            gen_dic_info["gen_count_refute"] = len(list_refuted_ids)
            gen_dic_info["gen_list_supported_ids"] = list_supported_ids
            gen_dic_info["gen_list_refuted_ids"] = list_refuted_ids
            gen_dic_info["gen_list_supported_confidence"] = list_supported_label_confidence
            gen_dic_info["gen_list_refuted_confidence"] = list_refuted_label_confidence
            if len(list_supported_label_confidence) > 0 :
                gen_dic_info["gen_list_supported_confidence_mean"] = mean(list_supported_label_confidence)
            if len(list_refuted_label_confidence) > 0:
                gen_dic_info["gen_list_refuted_confidence_mean"] = mean(list_refuted_label_confidence)
            gen_dic_info["gen_comment"] = "success"      
            
            
            common_all = (set(gen_dic_info["gen_list_supported_ids"]) | set(gen_dic_info["gen_list_refuted_ids"])) & \
                (set(dic_original_claim_info["org_list_supported_ids"]) | set(dic_original_claim_info["org_list_refuted_ids"]))
            
            common_support_refute = set(dic_original_claim_info["org_list_supported_ids"]) & set(gen_dic_info["gen_list_refuted_ids"])
            common_refute_support = set(dic_original_claim_info["org_list_refuted_ids"]) & set(gen_dic_info["gen_list_supported_ids"])
            common_support_support = set(dic_original_claim_info["org_list_supported_ids"]) & set(gen_dic_info["gen_list_supported_ids"])
            common_refute_refute = set(dic_original_claim_info["org_list_refuted_ids"]) & set(gen_dic_info["gen_list_refuted_ids"])
            
            gen_dic_info["common_all"] = len(common_all)
            gen_dic_info["common_support_refute"] = len(common_support_refute)
            gen_dic_info["common_refute_support"] = len(common_refute_support)
            gen_dic_info["common_support_support"] = len(common_support_support)
            gen_dic_info["common_refute_refute"] = len(common_refute_refute)
            
            gen_dic_info["gen_comment"] = "success" 
            
    except Exception as e:
        dic_info["gen_comment"] = "exception : "+e
        logging.info(">>> Exception gen claim >>> ") 
        logging.info(claim) 
        logging.info(e)
        
    finally:
        return gen_dic_info           

In [28]:
df_cur_itr_model_all_res_selected = df_cur_itr_model_all_res.iloc[:, :].copy()

In [29]:
result_as_dict = []


for index_df, cur_row in tqdm(df_cur_itr_model_all_res_selected.iloc[:,:].iterrows(), total=len(df_cur_itr_model_all_res_selected)):
    cur_res = {}

    dic_info_org_claim = get_stat_of_original_claim(cur_row)
    #cur_res["org_claim_info"] = dic_info_org_claim    

    
    #for cur_paraphrased_sent in list_paraphrased_claims_with_sim_threshold:
    cur_paraphrased_sent = cur_row['gen_claim']
    cur_dic_paraphraased_claim_info = get_results_by_gen_claim(cur_paraphrased_sent, dic_info_org_claim)

    
    cur_merged_dict = {**dic_info_org_claim, **cur_dic_paraphraased_claim_info}
    result_as_dict.append(cur_merged_dict)   
    
# result_as_dict = []
# for cur_claim in list_results_fine_tuned:
#     #print(cur_claim.keys())
#     for cur_gen_paraphrased_claim in cur_claim[dic_key_sentence_info]:
#         cur_merged_dict = {**cur_claim["org_claim_info"], **cur_gen_paraphrased_claim}
#         result_as_dict.append(cur_merged_dict)    

  0%|          | 0/2320 [00:00<?, ?it/s]



abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning empty list 
abstract selector ::  returning 

In [30]:
df_paraphrased_detal = pd.DataFrame(result_as_dict)

In [31]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_detail'

In [32]:
with open( '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'_concat_prev.pkl', 'wb') as fp:
    pickle.dump(df_paraphrased_detal, fp)

## Calculate similarity score

In [33]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_detail'

fle_dataframe_to_save = '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'_concat_prev.pkl'


In [34]:
pd.read_pickle(fle_dataframe_to_save)

Unnamed: 0,org_claim,num_of_query,attack_result,attack_method,org_count_support,org_count_refute,org_list_supported_ids,org_list_refuted_ids,org_list_supported_confidence,org_list_refuted_confidence,...,gen_list_supported_confidence,gen_list_refuted_confidence,gen_list_supported_confidence_mean,gen_list_refuted_confidence_mean,gen_comment,common_all,common_support_refute,common_refute_support,common_support_support,common_refute_refute
0,1 in 5 million in UK have abnormal PrP positiv...,1,SkippedAttackResult,TextFoolerJin2019,0,0,[],[],[],[],...,[],[],0.00,0.0,no result,0,0,0,0,0
1,32% of liver transplantation programs required...,219,FailedAttackResult,TextFoolerJin2019,0,0,[],[],[],[],...,[],[],0.00,0.0,no result,0,0,0,0,0
2,40mg/day dosage of folic acid and 2mg/day dosa...,1,SkippedAttackResult,TextFoolerJin2019,0,0,[],[],[],[],...,[],[],0.00,0.0,no result,0,0,0,0,0
3,76-85% of people with severe mental disorder r...,329,FailedAttackResult,TextFoolerJin2019,0,1,[],[e8fhlo37.00012],[],[0.6389865875244141],...,[],[],0.00,0.0,no result,0,0,0,0,0
4,A T helper 2 cell (Th2) environment impedes di...,1,SkippedAttackResult,TextFoolerJin2019,2,0,"[xdnh9tos.00001, 1ac14ga6]",[],"[0.9057600498199463, 0.6250912547111511]",[],...,"[0.9057600498199463, 0.6250912547111511]",[],0.77,0.0,success,2,0,0,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2315,Women with a higher birth weight are more like...,21,FailedAttackResult,bae_garg_2019,1,1,[upqaahwu],[0knkkibl.00019],[0.5015872716903687],[0.8610731959342957],...,[0.5538117289543152],[],0.55,0.0,success,1,0,0,1,0
2316,Women with a higher birth weight are more like...,21,FailedAttackResult,bae_garg_2019,1,1,[upqaahwu],[0knkkibl.00019],[0.5015872716903687],[0.8610731959342957],...,[0.5538117289543152],[],0.55,0.0,success,1,0,0,1,0
2317,aPKCz causes tumour enhancement by affecting g...,1,SkippedAttackResult,bae_garg_2019,0,0,[],[],[],[],...,[],[],0.00,0.0,no result,0,0,0,0,0
2318,cSMAC formation enhances weak ligand signalling.,126,FailedAttackResult,bae_garg_2019,1,0,[5no746n6.00039],[],[0.6769531965255737],[],...,[],[],0.00,0.0,no result,0,0,0,0,0


In [37]:
df_paraphrased_detal['attack_method'].value_counts()

TextFoolerJin2019         774
checklist_ribeiro_2020    773
bae_garg_2019             773
Name: attack_method, dtype: int64

In [43]:
df_selected_instances = df_paraphrased_detal.copy()

In [42]:
import tensorflow.compat.v1 as tf
import tensorflow_hub as hub

from numpy import dot
from numpy.linalg import norm

#### USE

In [45]:
def start_use_session(module):
    with tf.Graph().as_default():
        sentences = tf.placeholder(tf.string)
        embed = hub.load(module)
        embeddings = embed(sentences)
        session = tf.train.MonitoredSession()
    return lambda x: session.run(embeddings, {sentences: x})

In [46]:
def get_use_similarity(cur_row, model_use):
    """
    computes the USE embeddings of each sentence and its similarity with its corresponding pair
    Args:
        sentence_pairs(dict): dictionary of lists with the similarity type as key and a list of two sentences as value
    
    Returns:
        similarities(dict): dictionary with similarity type as key and the similarity measure as value
    """
    embed_org_claim = model_use([cur_row['org_claim']])[0]
    embed_gen_claim = model_use([cur_row['gen_claim']])[0] 
    similarities = dot(embed_org_claim, embed_gen_claim)/(norm(embed_org_claim)* norm(embed_gen_claim))
    return similarities

In [47]:
embed_url = 'https://tfhub.dev/google/universal-sentence-encoder/4'
model_use = start_use_session(embed_url)

df_selected_instances['sim_use'] = df_selected_instances.apply(lambda cur_row: get_use_similarity(cur_row, model_use), axis=1)

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


### Bert Base

In [48]:
from transformers import BertTokenizer, BertModel

In [49]:
def get_bert_similarity(cur_row, tokenizer_bert, model_bert):
    tok_org = tokenizer_bert(cur_row['org_claim'], return_tensors='pt')
    tok_gen = tokenizer_bert(cur_row['gen_claim'], return_tensors='pt')
    embed_org = model_bert(**tok_org).last_hidden_state[0][0].detach().numpy()
    embed_gen = model_bert(**tok_gen).last_hidden_state[0][0].detach().numpy()
    similarities = dot(embed_org, embed_gen)/(norm(embed_org)* norm(embed_gen)) # computes the average of all the tokens' last_hidden_state
    return similarities    

In [50]:
tokenizer_bert_base = BertTokenizer.from_pretrained('bert-base-uncased')
model_bert_base = BertModel.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [51]:
df_selected_instances['sim_bert_base'] = df_selected_instances.apply(lambda cur_row: get_bert_similarity(cur_row, tokenizer_bert_base, 
                                                                                                        model_bert_base), axis=1)

### Bert mean Base

In [52]:
def get_bert_similarity_mean(cur_row, tokenizer_bert, model_bert):
    tok_org = tokenizer_bert(cur_row['org_claim'], return_tensors='pt')
    tok_gen = tokenizer_bert(cur_row['gen_claim'], return_tensors='pt')
    embed_org = np.mean(model_bert(**tok_org).last_hidden_state[0].detach().numpy(), axis=0)
    embed_gen = np.mean(model_bert(**tok_gen).last_hidden_state[0].detach().numpy(), axis=0)
    similarities = dot(embed_org, embed_gen)/(norm(embed_org)* norm(embed_gen)) 
    return similarities 

In [53]:
df_selected_instances['sim_bert_base_mean'] = df_selected_instances.apply(lambda cur_row: get_bert_similarity_mean(cur_row, tokenizer_bert_base, 
                                                                                                        model_bert_base), axis=1)

### BioBert

- https://github.com/dmis-lab/biobert
- Trained on :
    - PubMed abstracts [https://pubmed.ncbi.nlm.nih.gov/]
    - PubMed Central [https://www.ncbi.nlm.nih.gov/pmc/]

> We used the original vocabulary of BERT_BASE for the following reasons: (i) compatibility of BioBERT with BERT, which allows BERT pre-trained on general domain corpora to be re-used, and makes it easier to interchangeably use existing models based on BERT and BioBERT and (ii) any new words may still be represented and fine-tuned for the biomedical domain using the original WordPiece vocabulary of BERT. - BioBeRt

![](https://miro.medium.com/max/2400/0*uJ-K6nXkPf9M9KuP)

In [57]:
model_bio_bert_base = AutoModel.from_pretrained('dmis-lab/biobert-v1.1')
tokenizer_bio_bert_base = AutoTokenizer.from_pretrained('dmis-lab/biobert-v1.1')

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [58]:
df_selected_instances['sim_bert_bio_base_mean'] = df_selected_instances.apply(lambda cur_row: get_bert_similarity_mean(cur_row, tokenizer_bio_bert_base, 
                                                                                                        model_bio_bert_base), axis=1)

### BlueBERT

- Biomedical Language Understanding Evaluation (BLUE) benchmark for 5 domain-specific tasks from 10 different datasets.

- &check; NER
- &check; Rrelation extraction
- &check; Rentence similarity

- **Pretrained with**:
    - `BERT_BASE` 
    - `BERT_LARGE`
- **4 version of BlueBert Model**:
    - `BERT_BASE `+ `PubMed`
    - `BERT_BASE `+ `PubMed` + `MIMIC III`
    - `BERT_LARGE` + `PubMed`
    - `BERT_LARGE` + `PubMed` + `MIMIC III`
- BlueBERT is **fine-tuned** on the same three tasks as BioBERT

In [59]:
model_blue_bert_basePM = AutoModel.from_pretrained('bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12')
tokenizer_blue_bert_basePM = AutoTokenizer.from_pretrained('bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12')

Downloading:   0%|          | 0.00/313 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/441M [00:00<?, ?B/s]

Some weights of the model checkpoint at bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12 were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [60]:
df_selected_instances['sim_blue_bert_basePM'] = df_selected_instances.apply(lambda cur_row: get_bert_similarity_mean(cur_row, tokenizer_blue_bert_basePM, 
                                                                                                        model_blue_bert_basePM), axis=1)

### Clinical BERT and Bio_Clinical BERT

- Developed by MIT
- `BERT_BASE` and `BioBERT-Base v1.0` (+ `PubMed` 200K + `PMC` 270K)
- **Pretrained with**:
    - `MIMIC III` (all or partial) (V1)
    - Bio+Clinical BERT (uses all MIMIC III data) (V2)
    - Bio+Discharge Summary BERT (uses only the discharge summaries in MIMIC III) (V3)
    - BERT_BASE are Clinical BERT (all of MIMIC III dataset) (V4)
    -  Discharge Summary BERT (only discharge summaries) (V5)
- **Fine Tune** :
    - ??

- **3 out of 5 version are fine tuned for**:
    - &check; NER
    - &check; Rrelation extraction
    - &check; Rentence similarity    

In [61]:
model_bert_bio_clinical_base = AutoModel.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')
tokenizer_bert_bio_clinical_base = AutoTokenizer.from_pretrained('emilyalsentzer/Bio_ClinicalBERT')

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [62]:
df_selected_instances['sim_bert_bio_clinical_base'] = df_selected_instances.apply(lambda cur_row: get_bert_similarity_mean(cur_row, tokenizer_bert_bio_clinical_base, 
                                                                                                        model_bert_bio_clinical_base), axis=1)

In [63]:
df_selected_instances.columns

Index(['org_claim', 'num_of_query', 'attack_result', 'attack_method',
       'org_count_support', 'org_count_refute', 'org_list_supported_ids',
       'org_list_refuted_ids', 'org_list_supported_confidence',
       'org_list_refuted_confidence', 'org_list_supported_confidence_mean',
       'org_list_refuted_confidence_mean', 'org_comment', 'gen_claim',
       'gen_count_support', 'gen_count_refute', 'gen_list_supported_ids',
       'gen_list_refuted_ids', 'gen_list_supported_confidence',
       'gen_list_refuted_confidence', 'gen_list_supported_confidence_mean',
       'gen_list_refuted_confidence_mean', 'gen_comment', 'common_all',
       'common_support_refute', 'common_refute_support',
       'common_support_support', 'common_refute_refute', 'sim_use',
       'sim_bert_base', 'sim_bert_base_mean', 'sim_bert_bio_base_mean',
       'sim_blue_bert_basePM', 'sim_bert_bio_clinical_base'],
      dtype='object')

In [64]:
df_selected_instances

Unnamed: 0,org_claim,num_of_query,attack_result,attack_method,org_count_support,org_count_refute,org_list_supported_ids,org_list_refuted_ids,org_list_supported_confidence,org_list_refuted_confidence,...,common_support_refute,common_refute_support,common_support_support,common_refute_refute,sim_use,sim_bert_base,sim_bert_base_mean,sim_bert_bio_base_mean,sim_blue_bert_basePM,sim_bert_bio_clinical_base
0,1 in 5 million in UK have abnormal PrP positiv...,1,SkippedAttackResult,TextFoolerJin2019,0,0,[],[],[],[],...,0,0,0,0,1.00,1.00,1.00,1.00,1.00,1.00
1,32% of liver transplantation programs required...,219,FailedAttackResult,TextFoolerJin2019,0,0,[],[],[],[],...,0,0,0,0,0.88,0.94,0.92,0.95,0.94,0.96
2,40mg/day dosage of folic acid and 2mg/day dosa...,1,SkippedAttackResult,TextFoolerJin2019,0,0,[],[],[],[],...,0,0,0,0,1.00,1.00,1.00,1.00,1.00,1.00
3,76-85% of people with severe mental disorder r...,329,FailedAttackResult,TextFoolerJin2019,0,1,[],[e8fhlo37.00012],[],[0.6389865875244141],...,0,0,0,0,0.55,0.85,0.83,0.85,0.83,0.91
4,A T helper 2 cell (Th2) environment impedes di...,1,SkippedAttackResult,TextFoolerJin2019,2,0,"[xdnh9tos.00001, 1ac14ga6]",[],"[0.9057600498199463, 0.6250912547111511]",[],...,0,0,2,0,1.00,1.00,1.00,1.00,1.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2315,Women with a higher birth weight are more like...,21,FailedAttackResult,bae_garg_2019,1,1,[upqaahwu],[0knkkibl.00019],[0.5015872716903687],[0.8610731959342957],...,0,0,1,0,0.94,1.00,0.99,1.00,0.99,0.99
2316,Women with a higher birth weight are more like...,21,FailedAttackResult,bae_garg_2019,1,1,[upqaahwu],[0knkkibl.00019],[0.5015872716903687],[0.8610731959342957],...,0,0,1,0,0.94,1.00,0.99,1.00,0.99,0.99
2317,aPKCz causes tumour enhancement by affecting g...,1,SkippedAttackResult,bae_garg_2019,0,0,[],[],[],[],...,0,0,0,0,1.00,1.00,1.00,1.00,1.00,1.00
2318,cSMAC formation enhances weak ligand signalling.,126,FailedAttackResult,bae_garg_2019,1,0,[5no746n6.00039],[],[0.6769531965255737],[],...,0,0,0,0,0.51,0.97,0.91,0.96,0.92,0.92


In [72]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_with_sim_score'

with open( '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'.pkl', 'wb') as fp:
    pickle.dump(df_selected_instances, fp)

In [65]:
def get_dataframes_by_majority_org_claim(df_all_paraphrased_org_claim):
    df_all_paraphrased_org_success = df_all_paraphrased_org_claim[df_all_paraphrased_org_claim['org_comment'] == 'success']
    
    # Select claims with majority
    df_paraphrased_org_support_major = df_all_paraphrased_org_success[
        df_all_paraphrased_org_success['org_count_support'] > df_all_paraphrased_org_success['org_count_refute']
    ]

    
    df_paraphrased_org_refute_major = df_all_paraphrased_org_success[
        df_all_paraphrased_org_success['org_count_support'] < df_all_paraphrased_org_success['org_count_refute']
    ]
    
    return df_paraphrased_org_support_major, df_paraphrased_org_refute_major, df_all_paraphrased_org_success


In [66]:
def get_df_succesfully_attacked_claim(df_paraphrased_support_major, df_paraphrased_refute_major):
    df_org_refute_gen_support = df_paraphrased_refute_major[
    df_paraphrased_refute_major['gen_count_support'] > df_paraphrased_refute_major['gen_count_refute']
    ]

    df_org_support_gen_refute = df_paraphrased_support_major[
        df_paraphrased_support_major['gen_count_support'] < df_paraphrased_support_major['gen_count_refute']
    ]
    
    return df_org_support_gen_refute, df_org_refute_gen_support



In [67]:
df_paraphrased_org_support_major, df_paraphrased_org_refute_major, df_all_paraphrased_cur_model_org_success= get_dataframes_by_majority_org_claim(df_selected_instances)

df_org_support_gen_refute, df_org_refute_gen_support = get_df_succesfully_attacked_claim(df_paraphrased_org_support_major, df_paraphrased_org_refute_major)



In [75]:
df_org_support_gen_refute['success_type'] = 'org_support_gen_refute'
df_org_refute_gen_support['success_type'] = 'org_refute_gen_support'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [69]:
df_org_support_gen_refute.shape

(22, 34)

In [89]:
df_org_support_gen_refute['attack_method'].value_counts()

bae_garg_2019             13
TextFoolerJin2019          8
checklist_ribeiro_2020     1
Name: attack_method, dtype: int64

In [90]:
df_org_refute_gen_support['attack_method'].value_counts()

bae_garg_2019        12
TextFoolerJin2019     7
Name: attack_method, dtype: int64

In [71]:
df_all_paraphrased_cur_model_org_success.shape

(1188, 34)

In [None]:
df_org_support_gen_refute, 

In [76]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_attack_sup_to_ref_with_sim_score'

with open( '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'.pkl', 'wb') as fp:
    pickle.dump(df_org_support_gen_refute, fp)

In [78]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_attack_ref_to_sup_with_sim_score'

with open( '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'.pkl', 'wb') as fp:
    pickle.dump(df_org_refute_gen_support, fp)

In [83]:
df_all_success_attack = pd.concat([df_org_support_gen_refute, 
                           df_org_refute_gen_support], axis =0)

In [85]:
df_all_success_attack

Unnamed: 0,org_claim,num_of_query,attack_result,attack_method,org_count_support,org_count_refute,org_list_supported_ids,org_list_refuted_ids,org_list_supported_confidence,org_list_refuted_confidence,...,common_refute_support,common_support_support,common_refute_refute,sim_use,sim_bert_base,sim_bert_base_mean,sim_bert_bio_base_mean,sim_blue_bert_basePM,sim_bert_bio_clinical_base,success_type
183,Having a main partner improves HIV outcomes.,44,SuccessfulAttackResult,TextFoolerJin2019,2,0,"[5qa2z9jj.00028, yxne28f0.00016]",[],"[0.8619446158409119, 0.683100163936615]",[],...,0,0,0,0.78,0.97,0.92,0.95,0.96,0.97,org_support_gen_refute
206,IL-10 production by monocytes inhibits CD4 + T...,112,FailedAttackResult,TextFoolerJin2019,3,0,"[z66yixzw.00003, xpc4yizb.00023, umn83919.00023]",[],"[0.9660091400146484, 0.7713584303855896, 0.581...",[],...,0,0,0,0.93,0.97,0.98,1.0,0.99,0.99,org_support_gen_refute
265,Lice attenuated SIV vaccines induce a stronger...,260,FailedAttackResult,TextFoolerJin2019,11,2,"[3qyn3ka5.00021, 61wibso4.00020, ss0g0er0.0002...","[ks7s0uhf.00007, zumq3tu0]","[0.9840624928474426, 0.9577320218086243, 0.938...","[0.5396791696548462, 0.5368397235870361]",...,0,0,0,0.81,0.93,0.94,0.98,0.96,0.98,org_support_gen_refute
508,The risk of cancer rises with level of alcohol...,43,SuccessfulAttackResult,TextFoolerJin2019,2,0,"[3gj5afvx, 2qr4c3dg.00032]",[],"[0.959775447845459, 0.8129923343658447]",[],...,0,0,0,0.83,0.96,0.95,0.96,0.93,0.97,org_support_gen_refute
578,Angiotensin converting enzyme inhibitors are a...,86,SuccessfulAttackResult,TextFoolerJin2019,5,3,"[e3zx2kh5.00006, 240jc7l4.00002, g3t2u0gf, gys...","[xsutuwn0.00009, 4r23a72a.00054, x1kankxd]","[0.728293776512146, 0.7071807384490967, 0.6744...","[0.8785445690155029, 0.8742775917053223, 0.652...",...,0,1,0,0.74,0.97,0.95,0.95,0.93,0.98,org_support_gen_refute
610,Chenodeosycholic acid treatment increases whol...,144,FailedAttackResult,TextFoolerJin2019,1,0,[wbjuz9yp.00019],[],[0.5970988273620605],[],...,0,0,0,0.54,0.94,0.94,0.97,0.93,0.98,org_support_gen_refute
619,Cytochrome c is released from the mitochondria...,77,FailedAttackResult,TextFoolerJin2019,6,0,"[lxwnp228, n9yq8enz.00017, s6kpewt6.00017, vzv...",[],"[0.9057084321975708, 0.805146336555481, 0.6127...",[],...,0,0,0,0.7,0.95,0.96,0.98,0.98,0.98,org_support_gen_refute
624,Enhanced early production of inflammatory chem...,234,FailedAttackResult,TextFoolerJin2019,4,1,"[bdm7yvn1.00008, je9t4i6q.00003, l5eeurgd.0000...",[jn8dh4vk.00033],"[0.9057894945144653, 0.8406556248664856, 0.826...",[0.5971629619598389],...,0,0,0,0.84,0.93,0.93,0.95,0.94,0.97,org_support_gen_refute
1120,Participants who quit smoking reduce lung canc...,4,FailedAttackResult,checklist_ribeiro_2020,1,0,[02lsd9p6.00029],[],[0.7352290749549866],[],...,0,0,0,0.94,1.0,1.0,1.0,0.99,1.0,org_support_gen_refute
1574,Active caspase-11 protein promotes pyroptosis.,120,FailedAttackResult,bae_garg_2019,1,0,[blbaqbo7.00032],[],[0.7216055393218994],[],...,0,0,0,0.47,0.9,0.96,0.98,0.96,0.99,org_support_gen_refute


In [86]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_success_attack_with_sim_score'

with open( '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'.pkl', 'wb') as fp:
    pickle.dump(df_all_success_attack, fp)

In [91]:
fle_dataframe_to_save = 'df_all_existing_method_paraphrased_success_attack_with_sim_score_clm'

with open( '../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'.pkl', 'wb') as fp:
    pickle.dump(df_all_success_attack[['org_claim', 'gen_claim', 'success_type','attack_method',
                                      'sim_use','sim_bert_base', 'sim_bert_base_mean', 
                                       'sim_bert_bio_base_mean','sim_blue_bert_basePM', 
                                       'sim_bert_bio_clinical_base']], fp)

In [92]:
df_all_success_attack[['org_claim', 'gen_claim', 'success_type','attack_method',
                                      'sim_use','sim_bert_base', 'sim_bert_base_mean', 
                                       'sim_bert_bio_base_mean','sim_blue_bert_basePM', 
                                       'sim_bert_bio_clinical_base']].to_csv('../../dfs_generated/paraphrased/existing_literature_methods/'+fle_dataframe_to_save+'.csv')

In [94]:
df_all_success_attack['attack_method'].value_counts()

bae_garg_2019             25
TextFoolerJin2019         15
checklist_ribeiro_2020     1
Name: attack_method, dtype: int64

In [95]:
df_all_success_attack[['org_claim', 'gen_claim', 'success_type','attack_method',
                                      'sim_use','sim_bert_base', 'sim_bert_base_mean', 
                                       'sim_bert_bio_base_mean','sim_blue_bert_basePM', 
                                       'sim_bert_bio_clinical_base']]

Unnamed: 0,org_claim,gen_claim,success_type,attack_method,sim_use,sim_bert_base,sim_bert_base_mean,sim_bert_bio_base_mean,sim_blue_bert_basePM,sim_bert_bio_clinical_base
183,Having a main partner improves HIV outcomes.,Having a main sidekick improves HIV outcomes.,org_support_gen_refute,TextFoolerJin2019,0.78,0.97,0.92,0.95,0.96,0.97
206,IL-10 production by monocytes inhibits CD4 + T...,IL-10 manufactures by monocytes inhibits CD4 +...,org_support_gen_refute,TextFoolerJin2019,0.93,0.97,0.98,1.0,0.99,0.99
265,Lice attenuated SIV vaccines induce a stronger...,Lice attenuated VIS vaccine engendered a harsh...,org_support_gen_refute,TextFoolerJin2019,0.81,0.93,0.94,0.98,0.96,0.98
508,The risk of cancer rises with level of alcohol...,The risk of cancer rises with level of cocktai...,org_support_gen_refute,TextFoolerJin2019,0.83,0.96,0.95,0.96,0.93,0.97
578,Angiotensin converting enzyme inhibitors are a...,Angiotensin change biochemical inhibitors are ...,org_support_gen_refute,TextFoolerJin2019,0.74,0.97,0.95,0.95,0.93,0.98
610,Chenodeosycholic acid treatment increases whol...,Chenodeosycholic amino treatment increased who...,org_support_gen_refute,TextFoolerJin2019,0.54,0.94,0.94,0.97,0.93,0.98
619,Cytochrome c is released from the mitochondria...,Cytochrome jim is unblocked from the mitochond...,org_support_gen_refute,TextFoolerJin2019,0.7,0.95,0.96,0.98,0.98,0.98
624,Enhanced early production of inflammatory chem...,Rising early produces of inflammatory chemokin...,org_support_gen_refute,TextFoolerJin2019,0.84,0.93,0.93,0.95,0.94,0.97
1120,Participants who quit smoking reduce lung canc...,Participants who quit smoking reduce lung canc...,org_support_gen_refute,checklist_ribeiro_2020,0.94,1.0,1.0,1.0,0.99,1.0
1574,Active caspase-11 protein promotes pyroptosis.,Active caspase-11 channels preventing pyroptosis.,org_support_gen_refute,bae_garg_2019,0.47,0.9,0.96,0.98,0.96,0.99
