# Leveraging LLMs for Abstract Screening

In [1]:
import json
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, BertModel

from openai import OpenAI
import os
from dotenv import load_dotenv



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_10 = pd.read_csv('/workspaces/LLMAbstractScreen/Dataset/12302024/CD008874_train_10_12302024.csv',
                        header = 0,
                        skiprows = 0)

train_20 = pd.read_csv('/workspaces/LLMAbstractScreen/Dataset/12302024/CD008874_train_20_12302024.csv',
                        header = 0,
                        skiprows = 0)

train_40 = pd.read_csv('/workspaces/LLMAbstractScreen/Dataset/12302024/CD008874_train_40_12302024.csv',
                        header = 0,
                        skiprows = 0)

test = pd.read_csv('/workspaces/LLMAbstractScreen/Dataset/01042025/CD008874_test_01042025.csv',
                        header = 0,
                        skiprows = 0)

In [3]:
def GetInput(row):
    output = '""" ### Title '
    output += row['title']
    output += ' ### Abstract '
    output += row['abstract']
    output += '"""'
    return output

In [4]:

train_10['Input'] = train_10.apply(lambda row: GetInput(row), axis=1)  
train_20['Input'] = train_20.apply(lambda row: GetInput(row), axis=1)  
train_40['Input'] = train_40.apply(lambda row: GetInput(row), axis=1)  
test['Input'] = test.apply(lambda row: GetInput(row), axis=1)  


In [5]:
# load BERT tokenizer (uncased)
transformer_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(transformer_name)

# load pretrained BERT model
model = BertModel.from_pretrained(transformer_name)

# assign device (cuda if possible)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load to device
model = model.to(device)

In [6]:
def getBertCls(text):
  '''
  this function takes the following input:
  text to be represented by the BERT CLS token
  and gives the following output:
  a numpy array representing the text
  '''
  tok_text = tokenizer(text[:1500],
                       return_tensors='pt').to(device)
  mod_output = model(**tok_text,
                     output_hidden_states=True)
  last_hidden_states = mod_output.hidden_states[-1]
  return last_hidden_states[:,0,:].cpu().detach().numpy()[0]

In [7]:
# create column with cls representation of document
train_10['BERT_cls'] = train_10['Input'].apply(getBertCls)
train_20['BERT_cls'] = train_20['Input'].apply(getBertCls)
train_40['BERT_cls'] = train_40['Input'].apply(getBertCls)
test['BERT_cls'] = test['Input'].apply(getBertCls)

In [8]:
def calCosSim(emb1, emb2):
  '''
  return the cosine similarity of the
  2 input numpy array
  '''
  result = emb1 @ emb2.T
  result /= (np.linalg.norm(emb1)*np.linalg.norm(emb2))
  return result

In [9]:
def GetPosExample_train10(row):
    emb = row['BERT_cls']
    PosExamples = train_10[train_10['Include_cont'] == 1].copy()
    PMID = row['PMID']
    PosExamples = PosExamples[PosExamples['PMID'] != PMID].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    
    return PosExamples.iloc[np.argmax(sim)]['Input']    

In [10]:
def GetPosExample_train20(row):
    emb = row['BERT_cls']
    PosExamples = train_20[train_20['Include_cont'] == 1].copy()
    PMID = row['PMID']
    PosExamples = PosExamples[PosExamples['PMID'] != PMID].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    
    return PosExamples.iloc[np.argmax(sim)]['Input']    

In [11]:
def GetPosExample_train40(row):
    emb = row['BERT_cls']
    PosExamples = train_40[train_40['Include_cont'] == 1].copy()
    PMID = row['PMID']
    PosExamples = PosExamples[PosExamples['PMID'] != PMID].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    
    return PosExamples.iloc[np.argmax(sim)]['Input']    

In [12]:
test['PosExample_10'] = test.apply(GetPosExample_train10, axis=1)
test['PosExample_20'] = test.apply(GetPosExample_train20, axis=1)
test['PosExample_40'] = test.apply(GetPosExample_train40, axis=1)

In [13]:
# get key
load_dotenv()
client = OpenAI(
    api_key = os.environ.get("OPENAI_API_KEY"),
)

In [14]:
prompt = """

### Instruction

You will take the role of a scientific research reviewer to evaluate titles and abstracts for further assessment. \
Please generate a relevance score for the title and abstract in triple quotes based on the following inclusion criteria. \
When generating the relevance score, please think step by step to determine if the title and abstract satisfy the inclusion criteria. \
Try to include all potentially relevant papers for further screening. \
The output should be binary, 1 being to include and 0 being to exclude.


### Inclusion Criteria

1. Studies including any of the Index Tests for the diagnosis of the Target Condition.

2. Index Tests (at least one of the following):
   - Mallampati test
   - Modified Mallampati test
   - Wilson risk score
   - Thyromental distance
   - Sternomental distance
   - Mouth opening test
   - Upper lip bite test

3. Target Condition: difficult airway, with at least one of the following reference standards:
   - Difficult face mask ventilation
   - Difficult laryngoscopy
   - Difficult tracheal intubation
   - Failed intubation

4. Participants:
   - Adult patients without abnormalities associated with difficult airway
   - Having laryngoscopy and the trachea intubated


### Evaluation Process

Please think through the following process

2. **Step 1: Index Test** – Does the abstract mention one or more of the index tests listed above?
3. **Step 2: Target Condition** – Does the abstract involve a target condition of difficult airway, as defined by the criteria?
4. **Step 3: Participants** – Are the participants adult patients without abnormalities associated with difficult airway? \
Did they have laryngoscopy and the trachea intubated?

After evaluating the abstract based on these steps, determine if it should be included in the review.

### Output

Expected output format:

{{
    "Relevance Score": int 1/0,
    "Reason": "Brief explanation of the Relevance Score based on inclusion criteria"
}}

"""


In [15]:
def getRelScore_10(row):
    txt = row['Input']
    example = row['PosExample_10']
    prompt_used = prompt + f'\n\n### Example \n\n Below is an example delimited by triple quotes that should definitely be included: \n """{example}"""'
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return completion.choices[0].message.content
    except:
        return None


In [16]:
def getRelScore_20(row):
    txt = row['Input']
    example = row['PosExample_20']
    prompt_used = prompt + f'\n\n### Example \n\n Below is an example delimited by triple quotes that should definitely be included: \n """{example}"""'
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return completion.choices[0].message.content
    except:
        return None

In [17]:
def getRelScore_40(row):
    txt = row['Input']
    example = row['PosExample_40']
    prompt_used = prompt + f'\n\n### Example \n\n Below is an example delimited by triple quotes that should definitely be included: \n """{example}"""'
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return completion.choices[0].message.content
    except:
        return None

In [18]:
test['Results_10'] = test.apply(getRelScore_10, axis=1)
test['Results_20'] = test.apply(getRelScore_20, axis=1)
test['Results_40'] = test.apply(getRelScore_40, axis=1)

In [19]:

def parseScore(res):
    try:
        js = res[1:-1]
        
        results = json.loads(js)
        
        return results["Relevance Score"]
    except:
        try:
            js = res[8:-4]
        
            results = json.loads(js)
        
            return results["Relevance Score"]
        except:
            return "Error"


def parseReason(res):
    try:
        js = res[1:-1]
        
        results = json.loads(js)
        
        return results["Reason"]
    except:
        try:
            js = res[8:-4]
        
            results = json.loads(js)
        
            return results["Reason"]
        except:
            return "Error"


In [20]:

test['RelevanceScore_10'] = test['Results_10'].apply(parseScore)    
test['Reason_10'] = test['Results_10'].apply(parseReason)

test['RelevanceScore_20'] = test['Results_20'].apply(parseScore)    
test['Reason_20'] = test['Results_20'].apply(parseReason)

test['RelevanceScore_40'] = test['Results_40'].apply(parseScore)    
test['Reason_40'] = test['Results_40'].apply(parseReason)


OSError: Cannot save file into a non-existent directory: '/workspaces/LLMAbstractScreening/Dataset/01042025'

In [21]:
test.to_csv('/workspaces/LLMAbstractScreen/Dataset/01042025/Ensemble_test_01052025_binary.csv', index=False)