# Prompting the models to classify the statements

In [13]:
import pandas as pd

# read the dataframe from a pickle file
df = pd.read_pickle('../data/ReferenceErrorDetection_data_with_chunk_info.pkl')

In [14]:
df.head()

Unnamed: 0,Source,Citing Article ID,Citing Article DOI,Citing Article Title,Citing Article Retracted,Citing Article Downloaded,Domain,Statement with Citation,Reference Article ID,Reference Article DOI,Reference Article Title,Reference Article Abstract,Reference Article PDF Available,Reference Article Retracted,Reference Article Downloaded,Label,Explanation,Top_3_Chunk_IDs,Top_3_Chunk_Texts
0,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Others have aimed to reduce irreversibility or...,r001,10.1155/2021/2087027,A Fault Analysis Method for Three-Phase Induct...,The fault prediction and abductive fault diagn...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[5cb7b532-0bf6-42e8-b245-5ce118330981, 9c03161...",[the winding and lead wirep 15Connection box j...
1,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Some researchers have also studied various hea...,r002,10.1016/j.physa.2018.12.031,Develop 24 dissimilar ANNs by suitable archite...,The artificial neural network optimization met...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[cfec4c95-2610-4b0d-b81e-3c46710e02e3, 17e8175...",[Therm. Anal. Calorim\n\t\t\n\t\t\t131\n\t\t\t...
2,PubPeer,c002,10.1155/2022/4601350,Oxidative Potential and Nanoantioxidant Activi...,Yes,Yes,Chemistry,The relative content of total flavonoids in th...,r003,10.1088/1742-6596/1937/1/012038,Lipid Data Acquisition for devices Treatment o...,"Recently, the widespread deployment of smart p...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[58d642c1-5f8e-4ac3-bc25-c74eb9c6f5bf, b00c81c...",[mmol/l and 6.96 mmol/l TG calculation concent...
3,PubPeer,c003,10.1155/2022/2408685,The Choice of Anesthetic Drugs in Outpatient H...,Yes,Yes,Medicine,Research has shown that remimazolam tosylate e...,r004,10.1186/s12871-018-0543-3,"Effect of propofol on breast cancer cell, the ...",Breast cancer is the second leading cause of c...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[021c7cfc-9329-4667-8653-753f07eb787d, 4aac749...",[J Anaesthesiol\n\t\t\n\t\t\t22\n\t\t\t8\n\t\t...
4,PubPeer,c004,10.1155/2022/4783847,A Fault-Tolerant Structure for Nano-Power Comm...,Yes,Yes,Engineering,if the efficiency of the routing algorithm is ...,r005,10.36410/jcpr.2022.23.3.312,Analysis and research hotspots of ceramic mate...,"From the perspective of scientometrics, comb t...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[716a8a3a-eed7-4c86-89a3-d63beecb4ff3, 7776e6e...","[size indicator is high, reflecting the import..."


## Create the prompts

In [28]:
def format_excerpts(excerpt_list):
    excerpts_text = ""
    for id, excerpt in enumerate(excerpt_list):
        excerpts_text += f"----- Excerpt {id+1}: -----\n{excerpt}\n\n"
    return excerpts_text

In [29]:
print(format_excerpts(df.iloc[0]['Top_3_Chunk_Texts']))

----- Excerpt 1: -----
the winding and lead wirep 15Connection box joint loosenedp 16Poor contact of the power control loop switchp 17Decrease in rotational speedp 18Excessive current in a phasep 19Excessive excitation currentp 20A phase voltage lossp 21Foreign matter enters the rotary shaft clearancep 22-e motor oil intakep 23Oxidation and decomposition of bearing lubricating oilp 24Bearing expansion by heatp 25Bearing generates additional loadp 26Rotor axial movesp 27-e iron core of the stator and rotor has an air gapp 28Rotor winding open circuitp 29Contact resistance value increasesp 30Motor overheatingp 31Phase-absent operationp 32Abnormal rotation or the rotor is stuckp 33Insulation agingp 34Reduction of lubricant oilp 35Friction occurs between the crankshaft ring and shaft holep 36Excessive vibration of the motor in operationp 37Excessive bearing noisep 38Motor sweepingp 39-ree-phase current of the stator increasesp 40Increased pressure dropp 41-ree-phase current asymmetryp 42Ex

In [38]:
def create_prompt(df_row):
    title = df_row['Citing Article Title']
    statement = df_row['Statement with Citation']
    reference_title = df_row['Reference Article Title']
    reference_abstract = df_row['Reference Article Abstract']
    reference_excerpts = format_excerpts(df_row['Top_3_Chunk_Texts'])

    prompt = f"""   
    You are an experienced scientific writer and editor. 
    You will be given a statement from an article that cites a reference article and information from the reference article. 
    You will determine and explain if the reference article supports the statement.  
    
    Specifically, choose a label from "Fully substantiated", "Partially substantiated", and "Unsubstantiated". 
    Further explanations of the labels are as follows: 
    "Fully substantiated": The reference article fully substantiates the relevant part of the statement from the present article. 
    "Partially substantiated": According to the reference article, there is a minor error in the statement but the error does not invalidate the purpose of the statement. 
    "Unsubstantiate": The reference part does not substantiate any part of the statement. This could be because the statement is contradictory to, unrelated to, or simply missing from the reference article.  
    
    Format your answer in JSON with two elements: "label" and "explanation". 
    Your explanation should be short and concise. 
    
    # The citing article
    Title: {title} 
    Statement: {statement}
    
    # The reference article 
    Title: {reference_title} 
    Abstract: {reference_abstract} 
    Excerpts: \n\n{reference_excerpts}
    """

    return prompt

In [39]:
example_prompt = create_prompt(df.iloc[0])
print(example_prompt)

   
    You are an experienced scientific writer and editor. 
    You will be given a statement from an article that cites a reference article and information from the reference article. 
    You will determine and explain if the reference article supports the statement.  
    
    Specifically, choose a label from "Fully substantiated", "Partially substantiated", and "Unsubstantiated". 
    Further explanations of the labels are as follows: 
    "Fully substantiated": The reference article fully substantiates the relevant part of the statement from the present article. 
    "Partially substantiated": According to the reference article, there is a minor error in the statement but the error does not invalidate the purpose of the statement. 
    "Unsubstantiate": The reference part does not substantiate any part of the statement. This could be because the statement is contradictory to, unrelated to, or simply missing from the reference article.  
    
    Format your answer in JSON with 

## Prompting the models

In [41]:
# Read the content of open_ai_key.txt into a variable
with open('../open_ai_key.txt', 'r') as file:
    open_ai_key = file.read().strip()

In [45]:
from openai import OpenAI
client = OpenAI(api_key=open_ai_key)

def send_prompt(prompt, model="gpt-3.5-turbo-0125"):
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": prompt
            }
        ]
    )
    return completion.choices[0].message.content

In [46]:
send_prompt(example_prompt)

'{\n    "label": "Unsubstantiated",\n    "explanation": "The reference article does not mention anything related to reducing irreversibility or optimizing energy-consumed devices as stated in the citing article."\n}'

In [48]:
# Create a new column in the dataframe to store the responses
df['Model Classification'] = None

# Iterate through the dataframe
for index, row in df.iterrows():
    if row['Reference Article Downloaded'] == 'Yes':
        print(f"Processing: " + row['Reference Article ID'])

        # Create the prompt
        prompt = create_prompt(row)
        
        # Send the prompt and get the response
        response = send_prompt(prompt)
        
        # Save the response to the new column
        df.at[index, 'Model Classification'] = response

# Display the updated dataframe
df.head()

Processing: r001
Processing: r002
Processing: r003
Processing: r004
Processing: r005
Processing: r006
Processing: r007
Processing: r008
Processing: r009
Processing: r010
Processing: r011
Processing: r012
Processing: r013
Processing: r013
Processing: r014
Processing: r015
Processing: r005
Processing: r017
Processing: r018
Processing: r019
Processing: r020
Processing: r021
Processing: r022
Processing: r023
Processing: r024
Processing: r013
Processing: r025
Processing: r026
Processing: r027
Processing: r028
Processing: r029
Processing: r030
Processing: r031
Processing: r032
Processing: r033
Processing: r034
Processing: r035
Processing: r036
Processing: r037
Processing: r038
Processing: r039
Processing: r040
Processing: r041
Processing: r042
Processing: r043
Processing: r044
Processing: r045
Processing: r046
Processing: r047
Processing: r048
Processing: r049
Processing: r050
Processing: r051
Processing: r052
Processing: r053
Processing: r051
Processing: r055
Processing: r056
Processing: r0

Unnamed: 0,Source,Citing Article ID,Citing Article DOI,Citing Article Title,Citing Article Retracted,Citing Article Downloaded,Domain,Statement with Citation,Reference Article ID,Reference Article DOI,Reference Article Title,Reference Article Abstract,Reference Article PDF Available,Reference Article Retracted,Reference Article Downloaded,Label,Explanation,Top_3_Chunk_IDs,Top_3_Chunk_Texts,Model Classification
0,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Others have aimed to reduce irreversibility or...,r001,10.1155/2021/2087027,A Fault Analysis Method for Three-Phase Induct...,The fault prediction and abductive fault diagn...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[5cb7b532-0bf6-42e8-b245-5ce118330981, 9c03161...",[the winding and lead wirep 15Connection box j...,"{\n ""label"": ""Unsubstantiated"",\n ""expla..."
1,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Some researchers have also studied various hea...,r002,10.1016/j.physa.2018.12.031,Develop 24 dissimilar ANNs by suitable archite...,The artificial neural network optimization met...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[cfec4c95-2610-4b0d-b81e-3c46710e02e3, 17e8175...",[Therm. Anal. Calorim\n\t\t\n\t\t\t131\n\t\t\t...,"{\n ""label"": ""Unsubstantiated"",\n ""expla..."
2,PubPeer,c002,10.1155/2022/4601350,Oxidative Potential and Nanoantioxidant Activi...,Yes,Yes,Chemistry,The relative content of total flavonoids in th...,r003,10.1088/1742-6596/1937/1/012038,Lipid Data Acquisition for devices Treatment o...,"Recently, the widespread deployment of smart p...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[58d642c1-5f8e-4ac3-bc25-c74eb9c6f5bf, b00c81c...",[mmol/l and 6.96 mmol/l TG calculation concent...,"{\n ""label"": ""Unsubstantiated"",\n ""expla..."
3,PubPeer,c003,10.1155/2022/2408685,The Choice of Anesthetic Drugs in Outpatient H...,Yes,Yes,Medicine,Research has shown that remimazolam tosylate e...,r004,10.1186/s12871-018-0543-3,"Effect of propofol on breast cancer cell, the ...",Breast cancer is the second leading cause of c...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[021c7cfc-9329-4667-8653-753f07eb787d, 4aac749...",[J Anaesthesiol\n\t\t\n\t\t\t22\n\t\t\t8\n\t\t...,"{\n\t""label"": ""Partially substantiated"",\n\t""e..."
4,PubPeer,c004,10.1155/2022/4783847,A Fault-Tolerant Structure for Nano-Power Comm...,Yes,Yes,Engineering,if the efficiency of the routing algorithm is ...,r005,10.36410/jcpr.2022.23.3.312,Analysis and research hotspots of ceramic mate...,"From the perspective of scientometrics, comb t...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[716a8a3a-eed7-4c86-89a3-d63beecb4ff3, 7776e6e...","[size indicator is high, reflecting the import...","{\n ""label"": ""Unsubstantiated"",\n ""expla..."


In [49]:
df.head()

Unnamed: 0,Source,Citing Article ID,Citing Article DOI,Citing Article Title,Citing Article Retracted,Citing Article Downloaded,Domain,Statement with Citation,Reference Article ID,Reference Article DOI,Reference Article Title,Reference Article Abstract,Reference Article PDF Available,Reference Article Retracted,Reference Article Downloaded,Label,Explanation,Top_3_Chunk_IDs,Top_3_Chunk_Texts,Model Classification
0,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Others have aimed to reduce irreversibility or...,r001,10.1155/2021/2087027,A Fault Analysis Method for Three-Phase Induct...,The fault prediction and abductive fault diagn...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[5cb7b532-0bf6-42e8-b245-5ce118330981, 9c03161...",[the winding and lead wirep 15Connection box j...,"{\n ""label"": ""Unsubstantiated"",\n ""expla..."
1,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Some researchers have also studied various hea...,r002,10.1016/j.physa.2018.12.031,Develop 24 dissimilar ANNs by suitable archite...,The artificial neural network optimization met...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[cfec4c95-2610-4b0d-b81e-3c46710e02e3, 17e8175...",[Therm. Anal. Calorim\n\t\t\n\t\t\t131\n\t\t\t...,"{\n ""label"": ""Unsubstantiated"",\n ""expla..."
2,PubPeer,c002,10.1155/2022/4601350,Oxidative Potential and Nanoantioxidant Activi...,Yes,Yes,Chemistry,The relative content of total flavonoids in th...,r003,10.1088/1742-6596/1937/1/012038,Lipid Data Acquisition for devices Treatment o...,"Recently, the widespread deployment of smart p...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[58d642c1-5f8e-4ac3-bc25-c74eb9c6f5bf, b00c81c...",[mmol/l and 6.96 mmol/l TG calculation concent...,"{\n ""label"": ""Unsubstantiated"",\n ""expla..."
3,PubPeer,c003,10.1155/2022/2408685,The Choice of Anesthetic Drugs in Outpatient H...,Yes,Yes,Medicine,Research has shown that remimazolam tosylate e...,r004,10.1186/s12871-018-0543-3,"Effect of propofol on breast cancer cell, the ...",Breast cancer is the second leading cause of c...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[021c7cfc-9329-4667-8653-753f07eb787d, 4aac749...",[J Anaesthesiol\n\t\t\n\t\t\t22\n\t\t\t8\n\t\t...,"{\n\t""label"": ""Partially substantiated"",\n\t""e..."
4,PubPeer,c004,10.1155/2022/4783847,A Fault-Tolerant Structure for Nano-Power Comm...,Yes,Yes,Engineering,if the efficiency of the routing algorithm is ...,r005,10.36410/jcpr.2022.23.3.312,Analysis and research hotspots of ceramic mate...,"From the perspective of scientometrics, comb t...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[716a8a3a-eed7-4c86-89a3-d63beecb4ff3, 7776e6e...","[size indicator is high, reflecting the import...","{\n ""label"": ""Unsubstantiated"",\n ""expla..."


In [50]:
df.to_pickle('../data/ReferenceErrorDetection_data_with_prompt_results.pkl')

## Compare model classification with labels

In [93]:
# Fix the entries where additional json prefixes and suffixes were added
for index, row in df.iterrows():
    if row['Model Classification'] and row['Model Classification'].startswith("```json") and row['Model Classification'].endswith("```"):
        df.at[index, 'Model Classification'] = row['Model Classification'][7:-3].strip()
    elif row['Model Classification'] and row['Model Classification'].startswith("```json") and row['Model Classification'].rstrip().endswith("```"):
        df.at[index, 'Model Classification'] = row['Model Classification'][7:].rstrip()[:-3].strip()

### All labels

In [94]:
import json

correct_classification = 0
incorrect_classification = 0
json_error = 0

for index, row in df.iterrows():
    if row['Reference Article Downloaded'] == 'Yes':
        try:
            model_classification = json.loads(row['Model Classification'])
            # print(f"{model_classification['label']}")
            if model_classification['label'][:-1].lower() == row['Label'].lower():
                correct_classification += 1
            else:
                # print(f"Row {index} Model Classification: {model_classification['label']} Actual Label: {row['Label']}")
                incorrect_classification += 1
        except json.JSONDecodeError as e:
            # print(f"Row {index} Model Classification could not be decoded: {e}")
            print(row['Model Classification'])
            json_error += 1

In [95]:
print(f"Correct classifications: {correct_classification}")
print(f"Incorrect classifications: {incorrect_classification}")
print(f"JSON errors: {json_error}")

print(f"Accuracy: {correct_classification / (correct_classification + incorrect_classification + json_error)}")
print(f"Accuracy without JSON errors: {correct_classification / (correct_classification + incorrect_classification)}")

Correct classifications: 135
Incorrect classifications: 112
JSON errors: 0
Accuracy: 0.5465587044534413
Accuracy without JSON errors: 0.5465587044534413


### Only two labels

In [96]:
import json

correct_classification = 0
incorrect_classification = 0
json_error = 0

for index, row in df.iterrows():
    if row['Reference Article Downloaded'] == 'Yes':
        try:
            model_classification = json.loads(row['Model Classification'])
            # print(f"{model_classification['label']}")
            if model_classification['label'][:-1].lower() in ['fully substantiate', 'partially substantiate'] and row['Label'].lower() in ['fully substantiate', 'partially substantiate']:
                correct_classification += 1
            elif model_classification['label'][:-1].lower() == 'unsubstantiate' and row['Label'].lower() == 'unsubstantiate':
                correct_classification += 1
            elif model_classification['label'][:-1].lower() not in ['fully substantiate', 'partially substantiate', 'unsubstantiate']:
                print(f"Row {index} Model Classification: {model_classification['label']} Actual Label: {row['Label']}")
                incorrect_classification += 1
            else:
                # print(f"Row {index} Model Classification: {model_classification['label']} Actual Label: {row['Label']}")
                incorrect_classification += 1
        except json.JSONDecodeError as e:
            # print(f"Row {index} Model Classification could not be decoded: {e}")
            json_error += 1

In [97]:
print(f"Correct classifications: {correct_classification}")
print(f"Incorrect classifications: {incorrect_classification}")
print(f"JSON errors: {json_error}")

print(f"Accuracy: {correct_classification / (correct_classification + incorrect_classification + json_error)}")
print(f"Accuracy without JSON errors: {correct_classification / (correct_classification + incorrect_classification)}")

Correct classifications: 196
Incorrect classifications: 51
JSON errors: 0
Accuracy: 0.7935222672064778
Accuracy without JSON errors: 0.7935222672064778
