# Model prompting with installed Ollama models

In [1]:
import pandas as pd

embedding = "te3l" # / "te3s"
grobid_model = "full_model"
no_prev_chunking = True

path = f"../data/dfs/{embedding}{'_no_prev_chunking' if no_prev_chunking else ''}/{grobid_model}/ReferenceErrorDetection_data_with_chunk_info.pkl"
print(path)

# read the dataframe from a pickle file
df = pd.read_pickle(path)

../data/dfs/te3l_no_prev_chunking/full_model/ReferenceErrorDetection_data_with_chunk_info.pkl


In [2]:
df.head()

Unnamed: 0,Source,Citing Article ID,Citing Article DOI,Citing Article Title,Citing Article Retracted,Citing Article Downloaded,Domain,Statement with Citation,Reference Article ID,Reference Article DOI,Reference Article Title,Reference Article Abstract,Reference Article PDF Available,Reference Article Retracted,Reference Article Downloaded,Label,Explanation,Top_3_Chunk_IDs,Top_3_Chunk_Texts
0,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Others have aimed to reduce irreversibility or...,r001,10.1155/2021/2087027,A Fault Analysis Method for Three-Phase Induct...,The fault prediction and abductive fault diagn...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[d5149dde-3f61-477d-acd9-0c8f97b154b5, f834613...",[Automatic implementation of fuzzy reasoning s...
1,PubPeer,c001,10.1016/j.est.2021.103553,Heating a residential building using the heat ...,Yes,Yes,Engineering,Some researchers have also studied various hea...,r002,10.1016/j.physa.2018.12.031,Develop 24 dissimilar ANNs by suitable archite...,The artificial neural network optimization met...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[dc4cd1b8-1ae4-4c15-be29-a4eb6e4270ba, 6210cc4...",[Heat transfer improvement of water/single-wal...
2,PubPeer,c002,10.1155/2022/4601350,Oxidative Potential and Nanoantioxidant Activi...,Yes,Yes,Chemistry,The relative content of total flavonoids in th...,r003,10.1088/1742-6596/1937/1/012038,Lipid Data Acquisition for devices Treatment o...,"Recently, the widespread deployment of smart p...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[2eeaa48b-a7ec-4674-87ac-511b439139c8, 0d45e20...",[The correspondence curve for our photochemica...
3,PubPeer,c003,10.1155/2022/2408685,The Choice of Anesthetic Drugs in Outpatient H...,Yes,Yes,Medicine,Research has shown that remimazolam tosylate e...,r004,10.1186/s12871-018-0543-3,"Effect of propofol on breast cancer cell, the ...",Breast cancer is the second leading cause of c...,Yes,No,Yes,Unsubstantiate,Irrelevant,"[c6131293-c506-4347-8441-06e480444093, 821ac75...",[Determination of the median effective concent...
4,PubPeer,c004,10.1155/2022/4783847,A Fault-Tolerant Structure for Nano-Power Comm...,Yes,Yes,Engineering,if the efficiency of the routing algorithm is ...,r005,10.36410/jcpr.2022.23.3.312,Analysis and research hotspots of ceramic mate...,"From the perspective of scientometrics, comb t...",Yes,No,Yes,Unsubstantiate,Irrelevant,"[65b14485-4faa-441e-9aac-9967bad1f603, 8b03de6...","[In the table, China's intermediary centrality..."


In [3]:
%%capture output
%run ./06_0_prompt_creation.ipynb

In [19]:
import ollama

def send_prompt(prompt, model):
    response = ollama.chat(model=model, messages=[{'role': 'user', 'content': prompt}])
    return response['message']['content']

In [23]:
import time

def prompting_model(df, model, save_intermediate_results=False):
    print(f"Prompting model: {model}")

    # Create a new column in the dataframe to store the responses
    if 'Model Classification' not in df.columns:
        df['Model Classification'] = None

    # Iterate through the dataframe
    for index, row in df.iterrows():
        if row['Reference Article Downloaded'] == 'Yes':
            start_time = time.time()
            print(f"Processing: " + row['Reference Article ID'])

            # Create the prompt
            prompt = create_prompt(row)
            
            # Send the prompt and get the response
            response = send_prompt(prompt, model)
            
            # Save the response to the new column
            df.at[index, 'Model Classification'] = response

            if save_intermediate_results and index % 10 == 0:
                df.to_pickle(f"../data/dfs/{embedding}{'_no_prev_chunking' if no_prev_chunking else ''}/{grobid_model}/ReferenceErrorDetection_data_with_prompt_results_{model}_intermed.pkl")
            end_time = time.time()
            print(f"Took {round(end_time - start_time, 2)} seconds")
            print("==================================")
    return df

In [24]:
models = ["llama3.1:70b", "deepseek-r1:32b", "codestral:latest", "deepseek-coder-v2:16b", "sroecker/sauerkrautlm-7b-hero:latest", "starcoder2:3b"]
model = models[0]

In [25]:
df2 = prompting_model(df, model, save_intermediate_results=True)

Prompting model: llama3.1:70b
Processing: r001
Took 8.96 seconds
Processing: r002
Took 8.64 seconds
Processing: r003
Took 9.76 seconds
Processing: r004
Took 17.96 seconds
Processing: r005
Took 9.16 seconds
Processing: r006
Took 9.0 seconds
Processing: r007
Took 9.45 seconds
Processing: r008
Took 11.58 seconds
Processing: r009
Took 11.04 seconds
Processing: r010
Took 9.55 seconds
Processing: r011
Took 9.04 seconds
Processing: r012
Took 8.92 seconds
Processing: r013
Took 9.99 seconds
Processing: r013
Took 9.21 seconds
Processing: r014
Took 8.22 seconds
Processing: r015
Took 15.31 seconds
Processing: r005
Took 9.87 seconds
Processing: r017
Took 12.14 seconds
Processing: r018
Took 9.27 seconds
Processing: r019
Took 9.18 seconds
Processing: r020
Took 9.45 seconds
Processing: r021
Took 10.57 seconds
Processing: r022
Took 9.72 seconds
Processing: r023
Took 14.36 seconds
Processing: r024
Took 17.94 seconds
Processing: r013
Took 9.67 seconds
Processing: r025
Took 17.76 seconds
Processing: r026


In [26]:
df2.to_pickle(f"../data/dfs/{embedding}{'_no_prev_chunking' if no_prev_chunking else ''}/{grobid_model}/ReferenceErrorDetection_data_with_prompt_results_{model}.pkl")