# Prompting the models to classify the statements

## GPT 3.5 and GPT 4.1

In [20]:
%%capture output
%run ./04_prompt_creation.ipynb

In [21]:
import pandas as pd

chunking = "256_20"
only_text = False

path = f"../data/dfs/{'only_text_' if only_text else ''}{chunking}/ReferenceErrorDetection_data_with_chunk_info.pkl"
print(path)

# read the dataframe from a pickle file
df = pd.read_pickle(path)

../data/dfs/256_20/ReferenceErrorDetection_data_with_chunk_info.pkl


In [22]:
df.head()

Unnamed: 0,Source,Citing Article ID,Citing Article DOI,Citing Article Title,Citing Article Retracted,Citing Article Downloaded,Domain,Citation ID,Statement with Citation,Corrected Statement,...,Reference Article PDF Available,Reference Article Retracted,Reference Article Downloaded,Label,Explanation,Error Type,Added,Previously Partially Substantiated,Top_3_Chunk_IDs,Top_3_Chunk_Texts
0,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,cit001_1,Others have aimed to reduce irreversibility or...,Others have aimed to reduce irreversibility or...,...,Yes,No,Yes,Unsubstantiated,Irrelevant,Irrelevant,No,,"[c891f2f3-aded-4300-b450-5036f6d1e0cd, ea451fa...",[Automatic implementation of fuzzy reasoning s...
1,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,cit001_2,Some researchers have also studied various hea...,Some researchers have also studied various hea...,...,Yes,No,Yes,Unsubstantiated,Irrelevant,Irrelevant,No,,"[180100fc-403e-4c11-8156-5d11487a8684, 2d68519...",[Experimental and numerical investigations on ...
2,PubPeer,c002,10.1155/2022/4601350,Oxidative Potential and Nanoantioxidant Activi...,Yes,Yes,Chemistry,cit002_1,The relative content of total flavonoids in th...,The relative content of total flavonoids in th...,...,Yes,No,Yes,Unsubstantiated,Irrelevant,Irrelevant,No,,"[06a6c4ea-8834-47af-97e1-9ebebd404f84, 6674247...","[However, it remains within the tolerance rang..."
3,PubPeer,c003,10.1155/2022/2408685,The Choice of Anesthetic Drugs in Outpatient H...,Yes,Yes,Medicine,cit003_1,Research has shown that remimazolam tosylate e...,Research has shown that remimazolam tosylate e...,...,Yes,No,Yes,Unsubstantiated,Irrelevant,Irrelevant,No,,"[0d368584-3525-43c6-abf0-f7270d354179, 10ae43c...",[Determination of the median effective concent...
4,PubPeer,c004,10.1155/2022/4783847,A Fault-Tolerant Structure for Nano-Power Comm...,Yes,Yes,Engineering,cit004_1,if the efficiency of the routing algorithm is ...,If the efficiency of the routing algorithm is ...,...,Yes,No,Yes,Unsubstantiated,Irrelevant,Irrelevant,No,,"[f8c19ee5-1a9b-461e-abe3-07f8b444aed7, 8ccaea4...","[In the table, China's intermediary centrality..."


## Prompting the models (batch processing)

In [26]:
import os
import json

def create_batch_files(df, model, number_files=1, ignore_ids=[]):
    output_dir = f"../data/batch_files/{'only_text_' if only_text else ''}{chunking}/{model}"
    # Empty the folder if it exists
    if os.path.exists(output_dir):
        for filename in os.listdir(output_dir):
            file_path = os.path.join(output_dir, filename)
            if os.path.isfile(file_path):
                os.remove(file_path)
    os.makedirs(output_dir, exist_ok=True)

    output_files = []
    for i in range(number_files):
        output_file = os.path.join(output_dir, f"prompt_batch_{i}.jsonl")
        # If the file already exists, empty it
        open(output_file, "w").close()
        output_files.append(output_file)
    
    for index, row in df.iterrows():
        if row['Reference Article Downloaded'] == 'Yes' and index not in ignore_ids:
            prompt = create_prompt(row)
            json_sequence = {
                "custom_id": f"request-{index}", 
                "method": "POST", 
                "url": "/v1/chat/completions", 
                "body": {
                    "model": model, 
                    "messages": [
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    "temperature": 0,
                }
            }

            output_file = output_files[index % number_files]
            with open(output_file, "a") as f:
                f.write(json.dumps(json_sequence) + "\n")
                
    # Remove empty output files from list
    output_files = [file for file in output_files if os.path.getsize(file) > 0]
    
    return output_files

In [27]:
import json

models = ["gpt-3.5-turbo-0125", "gpt-4.1-nano-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-2025-04-14"]
model = models[0]

os.makedirs(f"../data/batch_responses/{'only_text_' if only_text else ''}{chunking}", exist_ok=True)
responses_dict_path = f"../data/batch_responses/{'only_text_' if only_text else ''}{chunking}/{model}_responses_dict_batch.json"

responses_dict = {}
try:
    with open(responses_dict_path, 'r') as file:
        responses_dict = json.load(file)
    ids_to_ignore = [int(key) for key in responses_dict.keys()]
except FileNotFoundError:
    ids_to_ignore = []

print(ids_to_ignore)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 76, 77, 78, 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 

In [28]:
batch_file_paths = create_batch_files(df, model, 10, ids_to_ignore)
batch_file_paths

[]

In [10]:
# Read the content of open_ai_key.txt into a variable
with open('../open_ai_key.txt', 'r') as file:
    open_ai_key = file.read().strip()

In [11]:
def check_batch(batch_id, client):
    batch = client.batches.retrieve(batch_id)
    print(f"{batch_id} - Current status: {batch.status}")
    
    if batch.status == 'completed' or batch.status == 'failed':
        return batch
    return None

In [13]:
import time
from openai import OpenAI

batch_input_files = []
batch_creation_responses = []
batches = []

client = OpenAI(api_key=open_ai_key)

def prompt_model_in_batches():
    global batch_input_files
    global batch_creation_responses
    global batches

    for batch_file_path in batch_file_paths:
        # Creating input file
        if os.stat(batch_file_path).st_size == 0:
            print(f"Skipping empty file: {batch_file_path}")
            continue
        batch_input_file = client.files.create(
            file=open(batch_file_path, "rb"),
            purpose="batch"
        )
        print(batch_input_file)
        batch_input_files.append(batch_input_file)

        # Starting batch job
        batch_input_file_id = batch_input_file.id
        batch_creation_response = client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/chat/completions",
            completion_window="24h"
        )
        print("Started: " + batch_creation_response.id)

        time.sleep(5)
        # Check the status of the created batch until it is completed
        while True:
            batch_id = batch_creation_response.id
            batch = check_batch(batch_id, client)
            if batch:
                if batch.status == "failed":
                    return
                elif batch.status == "completed":
                    batches.append(batch)
                    break
            time.sleep(20)

In [14]:
prompt_model_in_batches()

FileObject(id='file-NKN6DgSMGDkr7goihZpyqw', bytes=79871, created_at=1751616818, filename='prompt_batch_0.jsonl', object='file', purpose='batch', status='processed', expires_at=None, status_details=None)
Started: batch_68678d339f1481908402cc14ee2fd515
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: in_progress
batch_68678d339f1481908402cc14ee2fd515 - Current status: i

### Check all open batches

In [78]:
import time
from openai import OpenAI
client = OpenAI(api_key=open_ai_key)

current_millis = int(time.time())
last_48_hours = current_millis - 48 * 60 * 60

open_batches = client.batches.list()
relevant_open_batches = [batch for batch in open_batches if batch.created_at >= last_48_hours]

In [79]:
list(relevant_open_batches)

[Batch(id='batch_686789f989bc8190b9cfc3a12dc8ccdc', completion_window='24h', created_at=1751615993, endpoint='/v1/chat/completions', input_file_id='file-4yaeKFJGdBkgePDQguW7hA', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1751616013, error_file_id=None, errors=None, expired_at=None, expires_at=1751702393, failed_at=None, finalizing_at=1751616006, in_progress_at=1751615994, metadata=None, output_file_id='file-P78Y1f515AFcMwRruQS2iJ', request_counts=BatchRequestCounts(completed=13, failed=0, total=13)),
 Batch(id='batch_6867883d6988819086546ecabdbf3cb2', completion_window='24h', created_at=1751615549, endpoint='/v1/chat/completions', input_file_id='file-VeJhVfnE2TzJYqQwUvou5T', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1751615758, error_file_id='file-RE14Ytz1f4Hrfc8oYjvJ56', errors=None, expired_at=None, expires_at=1751701949, failed_at=None, finalizing_at=1751615747, in_progress_at=1751615550, meta

In [80]:
client.files.content(relevant_open_batches[0].output_file_id).text

'{"id": "batch_req_68678a072b3881908b17d5aff3211ac4", "custom_id": "request-0", "response": {"status_code": 200, "request_id": "89ad89cfcb2a16e6f4cfc4ba768a5351", "body": {"id": "chatcmpl-BpVcqfN3e1zWesxuClqhceQMoxQyt", "object": "chat.completion", "created": 1751615996, "model": "gpt-3.5-turbo-0125", "choices": [{"index": 0, "message": {"role": "assistant", "content": "{\\n    \\"label\\": \\"Unsubstantiated\\",\\n    \\"explanation\\": \\"The reference article does not support the statement about reducing irreversibility or optimizing energy-consumed devices, as it focuses on fault analysis of three-phase induction motors.\\"\\n}", "refusal": null, "annotations": []}, "logprobs": null, "finish_reason": "stop"}], "usage": {"prompt_tokens": 1163, "completion_tokens": 51, "total_tokens": 1214, "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, "completion_tokens_details": {"reasoning_tokens": 0, "audio_tokens": 0, "accepted_prediction_tokens": 0, "rejected_prediction_toke

In [None]:
# client.batches.cancel("batch_67e3cf592eb081908cd64e5e1dc55fa0")

ConflictError: Error code: 409 - {'error': {'message': "Cannot cancel a batch with status 'completed'.", 'type': 'invalid_request_error', 'param': None, 'code': None}}

### Check the batch status

In [40]:
import time

def wait_for_batch_completion(batch_id, client, interval=10):
    while True:
        batch = check_batch(batch_id, client)
        if batch != None:
            return batch
        time.sleep(interval)

In [41]:
batch = wait_for_batch_completion("batch_68678417bf4c819085e0d77c012d5a7a", client)

batch_68678417bf4c819085e0d77c012d5a7a - Current status: completed


In [81]:
batch = check_batch("batch_686789f989bc8190b9cfc3a12dc8ccdc", client)

batch_686789f989bc8190b9cfc3a12dc8ccdc - Current status: completed


In [82]:
batches = [batch]

In [49]:
print(batch)

Batch(id='batch_68678417bf4c819085e0d77c012d5a7a', completion_window='24h', created_at=1751614487, endpoint='/v1/chat/completions', input_file_id='file-TubVrLSjHxewNncTaHNShi', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1751614569, error_file_id='file-39tX7oztfkdnqiNEpYDUMb', errors=None, expired_at=None, expires_at=1751700887, failed_at=None, finalizing_at=1751614556, in_progress_at=1751614490, metadata=None, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=25, total=25))


### Save model_responses

In [None]:
import json

# save responds of completed batches
for batch in batches:
    if batch.status != "completed":
        continue
    model_responses = client.files.content(batch.output_file_id).text

    # Parse the model_responses into a list of objects
    responses_list = [json.loads(line) for line in model_responses.splitlines()]
    # print(responses_list)

    try:
        for response in responses_list:
            responses_dict[int(response['custom_id'].split('-')[1])] = response
            responses_dict = dict(sorted(responses_dict.items(), key=lambda item: int(item[0])))
    except NameError:
        responses_dict = {int(response['custom_id'].split('-')[1]): response for response in responses_list}

In [16]:
responses_dict

{1: {'id': 'batch_req_68678e4e2e148190962504c4730bf998',
  'custom_id': 'request-1',
  'response': {'status_code': 200,
   'request_id': '3cb72115438fcad5f681f7d95a58efc6',
   'body': {'id': 'chatcmpl-BpVuFKFwBKPDBAWDIeQYYNUjajMp8',
    'object': 'chat.completion',
    'created': 1751617075,
    'model': 'gpt-3.5-turbo-0125',
    'choices': [{'index': 0,
      'message': {'role': 'assistant',
       'content': '{\n    "label": "Unsubstantiated",\n    "explanation": "The reference article does not support the statement about studying heat exchange devices to enhance efficiency. The reference article focuses on artificial neural network optimization for nanofluids, not heat exchange devices."\n}',
       'refusal': None,
       'annotations': []},
      'logprobs': None,
      'finish_reason': 'stop'}],
    'usage': {'prompt_tokens': 1111,
     'completion_tokens': 55,
     'total_tokens': 1166,
     'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0},
     'completion_token

In [17]:
import json

# Save responses_dict to a JSON file
with open(responses_dict_path, 'w') as file:
    json.dump(responses_dict, file, indent=4)

#### Save responds to Dataframe

In [37]:
# Create a new column in the dataframe to store the responses
if 'Model Classification' not in df.columns:
    df['Model Classification'] = None

# Iterate through the dataframe
for index, row in df.iterrows():
    if row['Reference Article Downloaded'] == 'Yes':
        model_response = responses_dict[f"{index}"]['response']['body']['choices'][0]['message']['content']
        
        # Save the response to the new column
        df.at[index, 'Model Classification'] = model_response

In [None]:
df.to_pickle(f"../data/dfs/{'only_text_' if only_text else ''}{chunking}/ReferenceErrorDetection_data_with_prompt_results_batched.pkl")
df.to_excel(f"../data/dfs/{'only_text_' if only_text else ''}{chunking}/ReferenceErrorDetection_data_with_prompt_results_batched.xlsx", index=False)

## Prompting the models (no batching)

In [9]:
# Read the content of open_ai_key.txt into a variable
with open('../open_ai_key.txt', 'r') as file:
    open_ai_key = file.read().strip()

In [10]:
from openai import OpenAI
client = OpenAI(api_key=open_ai_key)

def send_prompt(prompt, model):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": prompt
            }
        ],
        temperature=0,
        timeout=30,

    )
    return completion.choices[0].message.content

In [11]:
models = ["gpt-3.5-turbo-0125", "gpt-4-0125-preview", "gpt-4o-2024-05-13", "gpt-4-turbo-preview"]
model = models[2]

send_prompt(example_prompt, model)

'```json\n{\n  "label": "Unsubstantiate",\n  "explanation": "The reference article \'DeepCleave: a deep learning predictor for caspase and matrix metalloprotease substrates and cleavage sites\' focuses on deep learning models for predicting protease-specific substrates and cleavage sites. It does not address issues related to arranging classes or summarizing grades in a high-level management context, which is the subject of the statement in the citing article."\n}\n```'

In [37]:
# path = f"../data/dfs/{embedding}{'_no_prev_chunking' if no_prev_chunking else ''}/{grobid_model}/ReferenceErrorDetection_data_with_prompt_results.pkl"
# df = pd.read_pickle(path)

In [12]:
ids_to_prompt = []
ids_not_to_prompt = []

In [None]:
def prompting_model(df, model, save_intermediate_results=False):
    print(f"Prompting model: {model}")

    # Create a new column in the dataframe to store the responses
    if 'Model Classification' not in df.columns:
        df['Model Classification'] = None

    # Iterate through the dataframe
    for index, row in df.iterrows():
        if row['Reference Article Downloaded'] == 'Yes':
            if len(ids_to_prompt) != 0 and row['Reference Article ID'] not in ids_to_prompt:
                continue

            if len(ids_not_to_prompt) != 0 and row['Reference Article ID'] in ids_not_to_prompt:
                continue

            print(f"Processing: " + row['Reference Article ID'])

            # Create the prompt
            prompt = create_prompt(row)
            
            # Send the prompt and get the response
            response = send_prompt(prompt, model)
            
            # Save the response to the new column
            df.at[index, 'Model Classification'] = response

            if save_intermediate_results and index % 10 == 0:
                df.to_pickle(f"../data/dfs/{embedding}{'_no_prev_chunking' if no_prev_chunking else ''}/{grobid_model}/ReferenceErrorDetection_data_with_prompt_results_{model}_intermed.pkl")
    return df

In [15]:
df2 = prompting_model(df, model, True)

Prompting model: gpt-4o-2024-05-13
Processing: r001
Processing: r002
Processing: r003
Processing: r004
Processing: r005
Processing: r006
Processing: r007
Processing: r008
Processing: r009
Processing: r010
Processing: r011
Processing: r012
Processing: r013
Processing: r013
Processing: r014
Processing: r015
Processing: r005
Processing: r017
Processing: r018
Processing: r019
Processing: r020
Processing: r021
Processing: r022
Processing: r023
Processing: r024
Processing: r013
Processing: r025
Processing: r026
Processing: r027
Processing: r028
Processing: r029
Processing: r030
Processing: r031
Processing: r032
Processing: r033
Processing: r034
Processing: r035
Processing: r036
Processing: r037
Processing: r038
Processing: r039
Processing: r040
Processing: r041
Processing: r042
Processing: r043
Processing: r044
Processing: r045
Processing: r046
Processing: r047
Processing: r048
Processing: r049
Processing: r050
Processing: r051
Processing: r052
Processing: r053
Processing: r051
Processing: r

In [16]:
df2.to_pickle(f"../data/dfs/{embedding}{'_no_prev_chunking' if no_prev_chunking else ''}/{grobid_model}/ReferenceErrorDetection_data_with_prompt_results_{model}.pkl")