# Leveraging LLMs for Abstract Screening

In [1]:
import json
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, BertModel

from openai import OpenAI
import os
from dotenv import load_dotenv



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_10 = pd.read_csv('./Dataset/CD008874_train_10_12302024.csv',
                        header = 0,
                        skiprows = 0)

train_20 = pd.read_csv('./Dataset/CD008874_train_20_12302024.csv',
                        header = 0,
                        skiprows = 0)

train_40 = pd.read_csv('./Dataset/CD008874_train_40_12302024.csv',
                        header = 0,
                        skiprows = 0)

dev = pd.read_csv('./Dataset/CD008874_dev_200_12302024.csv',
                        header = 0,
                        skiprows = 0)


In [3]:
def GetInput(row):
    output = '""" ### Title '
    output += row['title']
    output += ' ### Abstract '
    output += row['abstract']
    output += '"""'
    return output

In [4]:

train_10['Input'] = train_10.apply(lambda row: GetInput(row), axis=1)  
train_20['Input'] = train_20.apply(lambda row: GetInput(row), axis=1)  
train_40['Input'] = train_40.apply(lambda row: GetInput(row), axis=1)  
dev['Input'] = dev.apply(lambda row: GetInput(row), axis=1)  


In [5]:
# load BERT tokenizer (uncased)
transformer_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(transformer_name)

# load pretrained BERT model
model = BertModel.from_pretrained(transformer_name)

# assign device (cuda if possible)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load to device
model = model.to(device)

In [6]:
def getBertCls(text):
  '''
  this function takes the following input:
  text to be represented by the BERT CLS token
  and gives the following output:
  a numpy array representing the text
  '''
  tok_text = tokenizer(text[:512],
                       return_tensors='pt').to(device)
  mod_output = model(**tok_text,
                     output_hidden_states=True)
  last_hidden_states = mod_output.hidden_states[-1]
  return last_hidden_states[:,0,:].cpu().detach().numpy()[0]

In [7]:
# create column with cls representation of document
train_10['BERT_cls'] = train_10['Input'].apply(getBertCls)
train_20['BERT_cls'] = train_20['Input'].apply(getBertCls)
train_40['BERT_cls'] = train_40['Input'].apply(getBertCls)
dev['BERT_cls'] = dev['Input'].apply(getBertCls)


In [8]:
def calCosSim(emb1, emb2):
  '''
  return the cosine similarity of the
  2 input numpy array
  '''
  result = emb1 @ emb2.T
  result /= (np.linalg.norm(emb1)*np.linalg.norm(emb2))
  return result

In [9]:
def GetPosExample_train10(row):
    emb = row['BERT_cls']
    PosExamples = train_10[train_10['Include_cont'] == 1].copy()
    PMID = row['PMID']
    PosExamples = PosExamples[PosExamples['PMID'] != PMID].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    
    return PosExamples.iloc[np.argmax(sim)]['Input']    

In [10]:
def GetPosExample_train20(row):
    emb = row['BERT_cls']
    PosExamples = train_20[train_20['Include_cont'] == 1].copy()
    PMID = row['PMID']
    PosExamples = PosExamples[PosExamples['PMID'] != PMID].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    
    return PosExamples.iloc[np.argmax(sim)]['Input']    

In [11]:
def GetPosExample_train40(row):
    emb = row['BERT_cls']
    PosExamples = train_40[train_40['Include_cont'] == 1].copy()
    PMID = row['PMID']
    PosExamples = PosExamples[PosExamples['PMID'] != PMID].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    
    return PosExamples.iloc[np.argmax(sim)]['Input']    

In [12]:
train_10['PosExample_10'] = train_10.apply(GetPosExample_train10, axis=1)
dev['PosExample_10'] = dev.apply(GetPosExample_train10, axis=1)

train_20['PosExample_20'] = train_20.apply(GetPosExample_train20, axis=1)
dev['PosExample_20'] = dev.apply(GetPosExample_train20, axis=1)


train_40['PosExample_40'] = train_40.apply(GetPosExample_train40, axis=1)
dev['PosExample_40'] = dev.apply(GetPosExample_train40, axis=1)

In [13]:
# get key
load_dotenv()
client = OpenAI(
    api_key = os.environ.get("OPENAI_API_KEY"),
)

In [14]:
prompt = """

## Instruction

You will take the role of a scientific research reviewer to evaluate titles and abstracts for further assessment \
concerning the systematic review study titled \
"Airway physical examination tests for detection of difficult airway management in apparently normal adult patients" \
Please generate a relevance score for the title and abstract in triple quotes based on the following inclusion criteria. \
The relevance score should be between 0 (completely irrelevant) to 100 (perfectly relevant). \
You will need to review the abstract and apply the inclusion criteria step by step to determine if it meets the requirements. \
Please think through your decision-making process and explain each step.

### Inclusion Criteria

1. Study Type: Full-text diagnostic test accuracy studies.

2. Index Tests (one or more):
   - Mallampati test
   - Modified Mallampati test
   - Wilson risk score
   - Thyromental distance
   - Sternomental distance
   - Mouth opening test
   - Upper lip bite test

3. Target Condition: Difficult airway, defined by:
   - Difficult face mask ventilation
   - Difficult laryngoscopy
   - Difficult tracheal intubation
   - Failed intubation

4. Participants:
   - Adult patients
   - No obvious airway abnormalities
   - Standard laryngoscope used
   - Standard tracheal tube used

### Process

For each abstract, follow this thought process:

1. **Step 1: Study Type** – Is the study a diagnostic test accuracy study?
2. **Step 2: Index Test(s)** – Does the abstract mention one or more of the index tests listed above?
3. **Step 3: Target Condition** – Does the abstract involve a target condition of difficult airway, as defined by the criteria?
4. **Step 4: Participants** – Are the participants adult patients without obvious airway abnormalities, \
and were standard laryngoscopy and tracheal tube used?

After evaluating the abstract based on these steps, determine if it should be included in the review.

### Output

Expected output format:

{{
    "RelevanceScore": int from 0 to 100,
    "Reason": "Brief explanation of the Relevance Score based on inclusion criteria"
}}

"""


In [15]:
def getRelScore_10(row):
    txt = row['Input']
    example = row['PosExample_10']
    prompt_used = prompt + f'\n\n### Example \n\n Below is an example delimited by triple quotes that should definitely be included: \n """{example}"""'
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return completion.choices[0].message.content
    except:
        return None


In [16]:
train_10['Results_10'] = train_10.apply(getRelScore_10, axis=1)
dev['Results_10'] = dev.apply(getRelScore_10, axis=1)

In [17]:
def getRelScore_20(row):
    txt = row['Input']
    example = row['PosExample_20']
    prompt_used = prompt + f'\n\n### Example \n\n Below is an example delimited by triple quotes that should definitely be included: \n """{example}"""'
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return completion.choices[0].message.content
    except:
        return None

In [18]:
train_20['Results_20'] = train_20.apply(getRelScore_20, axis=1)
dev['Results_20'] = dev.apply(getRelScore_20, axis=1)

In [19]:
def getRelScore_40(row):
    txt = row['Input']
    example = row['PosExample_40']
    prompt_used = prompt + f'\n\n### Example \n\n Below is an example delimited by triple quotes that should definitely be included: \n """{example}"""'
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return completion.choices[0].message.content
    except:
        return None

In [20]:
train_40['Results_40'] = train_40.apply(getRelScore_40, axis=1)
dev['Results_40'] = dev.apply(getRelScore_40, axis=1)

In [21]:

def parseScore(res):
    try:
        js = res[1:-1]
        
        results = json.loads(js)
        
        return results["RelevanceScore"]
    except:
        try:
            js = res[8:-4]
        
            results = json.loads(js)
        
            return results["RelevanceScore"]
        except:
            return "Error"
    



def parseReason(res):
    try:
        js = res[1:-1]
        
        results = json.loads(js)
        
        return results["Reason"]
    except:
        try:
            js = res[8:-4]
        
            results = json.loads(js)
        
            return results["Reason"]
        except:
            return "Error"


In [22]:
train_10['RelevanceScore'] = train_10['Results_10'].apply(parseScore)    
train_10['Reason'] = train_10['Results_10'].apply(parseReason)
train_10.to_csv('./Dataset/DynamicOneShotCoT_Training_10_12302024.csv', index=False)

In [23]:
train_20['RelevanceScore'] = train_20['Results_20'].apply(parseScore)    
train_20['Reason'] = train_20['Results_20'].apply(parseReason)
train_20.to_csv('./Dataset/DynamicOneShotCoT_Training_20_12302024.csv', index=False)


In [24]:

train_40['RelevanceScore'] = train_40['Results_40'].apply(parseScore)    
train_40['Reason'] = train_40['Results_40'].apply(parseReason)
train_40.to_csv('./Dataset/DynamicOneShotCoT_Training_40_12302024.csv', index=False)

In [25]:

dev['RelevanceScore_10'] = dev['Results_10'].apply(parseScore)    
dev['Reason_10'] = dev['Results_10'].apply(parseReason)

dev['RelevanceScore_20'] = dev['Results_20'].apply(parseScore)    
dev['Reason_20'] = dev['Results_20'].apply(parseReason)

dev['RelevanceScore_40'] = dev['Results_40'].apply(parseScore)    
dev['Reason_40'] = dev['Results_40'].apply(parseReason)

dev.to_csv('./Dataset/DynamicOneShotCoT_dev_12302024.csv', index=False)