# Leveraging LLMs for Abstract Screening

In [33]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, BertModel

from openai import OpenAI
import os
from dotenv import load_dotenv



In [None]:
Dataset = pd.read_csv('./Dataset/Abstracts_CD008874_20241012_sample.csv')
Examples = pd.read_csv('./Dataset/Abstracts_CD008874_20241012_examples.csv')

In [5]:
Dataset['Title/Abstract'] = Dataset['title'] + ' ' + Dataset['abstract']
Examples['Title/Abstract'] = Examples['title'] + ' ' + Examples['abstract']

In [6]:
# load BERT tokenizer (uncased)
transformer_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(transformer_name)

# load pretrained BERT model
model = BertModel.from_pretrained(transformer_name)

# assign device (cuda if possible)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load to device
model = model.to(device)

In [7]:
def getBertCls(text):
  '''
  this function takes the following input:
  text to be represented by the BERT CLS token
  and gives the following output:
  a numpy array representing the text
  '''
  tok_text = tokenizer(text[:512],
                       return_tensors='pt').to(device)
  mod_output = model(**tok_text,
                     output_hidden_states=True)
  last_hidden_states = mod_output.hidden_states[-1]
  return last_hidden_states[:,0,:].cpu().detach().numpy()[0]

In [None]:
# create column with cls representation of document
Dataset['BERT_cls'] = Dataset['Title/Abstract'].apply(getBertCls)
Examples['BERT_cls'] = Examples['Title/Abstract'].apply(getBertCls)

NegExamples = Examples[Examples['Include_cont'] == 0].copy()

In [10]:
def calCosSim(emb1, emb2):
  '''
  return the cosine similarity of the
  2 input numpy array
  '''
  result = emb1 @ emb2.T
  result /= (np.linalg.norm(emb1)*np.linalg.norm(emb2))
  return result

In [21]:
def GetPosExample(row):
    emb = row['BERT_cls']
    PosExamples = Examples[Examples['Include_cont'] == 1].copy()
    PosExamples['Sim'] = PosExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = PosExamples['Sim']
    return PosExamples.iloc[np.argmax(sim)]['Title/Abstract']    

In [22]:
Dataset['PosExample'] = Dataset.apply(GetPosExample, axis=1)


In [30]:
def GetNegExample(row):
    emb = row['BERT_cls']
    NegExamples = Examples[Examples['Include_cont'] == 0].copy()
    NegExamples['Sim'] = NegExamples['BERT_cls'].apply(lambda x: calCosSim(emb, x))
    sim = NegExamples['Sim']
    return NegExamples.iloc[np.argmax(sim)]['Title/Abstract']    

In [31]:
Dataset['NegExample'] = Dataset.apply(GetPosExample, axis=1)

In [35]:
# get key
load_dotenv()
client = OpenAI(
    api_key = os.environ.get("OPENAI_API_KEY"),
)

In [None]:

prompt = """
We are trying to conduct a systematic review study titled \
"Airway physical examination tests for detection of difficult airway management in apparently normal adult patients". \
You will take the role of a reviewer to check the abstracts and titles if they should be selected \
for further assessment and full-text screening. \
Please give a score between 1 - 100 with no explanation for the following abstract and abstract on \
    how likely this article should be included:, where 70 and above means we should screen the paper and \
        69 and below means we should not screen the paper. \
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
The paper selection criteria are the followings: \
    
    Full‐text diagnostic test accuracy studies of any individual index test, or a combination of tests, against a reference standard.
    Index tests included the Mallampati test, modified Mallampati test, Wilson risk score, thyromental distance, \
    sternomental distance, mouth opening test, upper lip bite test, or any combination of these. \
    The target condition was difficult airway, with one of the following reference standards: difficult face mask ventilation, \
    difficult laryngoscopy, difficult tracheal intubation, and failed intubation.
    Participants were adults without obvious airway abnormalities, who were having laryngoscopy performed with \
    a standard laryngoscope and the trachea intubated with a standard tracheal tube.
"""


In [36]:
def getRelScore(row):
    txt = row['Title/Abstract']
    example = row['PosExample']
    prompt_used = prompt + f"\n\nBelow is an example that should definitely be included: \n {example}"
    try:
        completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
        {"role": "system", "content": prompt_used},
        {"role": "user", "content": txt}
        ]
        )
        return(int(completion.choices[0].message.content))
    except:
        return None

Dataset['RelevanceScore'] = Dataset.apply(getRelScore, axis=1)

Dataset.to_csv('./Dataset/RankedSample_CD008874_dyna1Ex_20241129.csv', index=False)
