# Imports and Constants

In [4]:
import pandas as pd
from tqdm import tqdm
import numpy as np
from datetime import datetime
import os
import numpy as np
import regex as re
from collections import Counter
from utils import prompt, update_labels, master_path, get_answer_vector, stripJSON
import json

CONNECTION_ON = True
MODEL = 'ollama'    # 'gpt-4o' 'ollama' 'gpt-3.5-turbo' 
right_now = datetime.now().replace(microsecond=0, second=0)
label_col = 'json_response'

# Define Prompt and Questions

In [5]:
question_list = [
    "The CITING sentences mention results from the CITED paper to support background claims.",
    "The CITING sentences mention a technique of the CITED paper in order to provide relevant background context.",
    "The CITING sentences mention performance results of the CITED paper to contextualize the CITED model's capabilities.",
    "The CITING authors describe other researchers using the CITED foundation models.",
    "The CITING authors use the CITED foundation models to note a similarity or difference to an existing method.",
    "It's ambiguous as to whether or not the CITING authors actually use the CITED foundation model based on the wording.",
    "The CITING authors deploy the CITED foundation model's encoder or decoder as part of their methodology.",
    "The CITING authors use the CITED foundation model to create embeddings as part of their methodology.",
    "The CITING authors use the CITED foundation model for feature extraction as part of their methodology.",
    "The CITING authors use the CITED foundation model as a classifier or detector as part of their methodology.",
    "The CITING authors deploy the CITED foundation model to generate data in the form of text/image/audio/video for later training of their model.",
    "The CITING authors perform their own evaluation on the CITED foundation model.",
    "The CITING authors perform an ablation study on the CITED foundation model.",
    "The CITING authors clearly deploy the CITED foundation model at some point throughout their methodology.",
    "It's unclear whether or not the CITING authors perform any training on the CITED foundation model.",
    "The CITING authors deploy fine-tuning or adjusting the parameter weights of the CITED foundation model.",
    "The CITING authors pre-train or train-from-scratch the CITED model as part of their methodology.",
    "The CITING authors mention fine-tuning the CITED foundation model as a possibility, but do not mention fine-tuning themselves.",
    "The CITING authors train a model using the CITED paper's dataset.",
    "The CITING authors adopt the CITED foundation model's architecture as part of their model design.",
    "The CITING authors use the CITED foundation model to perform transfer learning.",
    "The CITING authors use the CITED foundation model as a benchmark for comparison.",
    "The CITING authors report improvements achieved by using the CITED foundation model over other models.",
    "The CITING authors integrate the CITED foundation model with other models or algorithms.",
    "The CITING authors use the CITED foundation model to validate a hypothesis or experimental setup.",
    "The CITING authors conduct a qualitative analysis involving the CITED foundation model.",
    "The CITING authors conduct a quantitative analysis involving the CITED foundation model.",
    "The CITING authors highlight limitations or challenges of using the CITED foundation model.",
    "The CITING authors discuss future work or potential extensions involving the CITED foundation model.",
    "The CITING authors leverage the CITED foundation model for a specific application domain (e.g., healthcare, finance, NLP).",
    "The CITING authors mention modifications or adaptations made to the CITED foundation model.",
    "The CITING authors deploy the CITED foundation model in a real-world scenario or experiment.",
    "The CITING authors use the CITED foundation model to perform anomaly detection.",
    "The CITING authors use the CITED foundation model to perform sentiment analysis.",
    "The CITING authors use the CITED foundation model for unsupervised learning tasks.",
    "The CITING authors use the CITED foundation model for supervised learning tasks.",
    "The CITING authors use the CITED foundation model for reinforcement learning tasks.",
    "The CITING authors use the CITED foundation model to process multi-modal data.",
    "The CITING authors use the CITED foundation model to enhance interpretability or explainability of their results.",
    "The CITING authors employ the CITED foundation model to optimize hyperparameters in their experiments.",
    "The CITING authors fine-tune the CITED foundation model on a domain-specific dataset.",
    "The CITING authors mention using a smaller learning rate specifically for fine-tuning the CITED foundation model.",
    "The CITING authors use the CITED foundation model's pre-trained weights as initialization for their own model.",
    "The CITING authors employ transfer learning techniques involving the CITED foundation model.",
    "The CITING authors use the CITED foundation model to pre-train a model before fine-tuning on a specific task.",
    "The CITING authors report using a specific dataset for fine-tuning the CITED foundation model.",
    "The CITING authors mention the number of epochs or iterations used for fine-tuning the CITED foundation model.",
    "The CITING authors discuss the computational resources needed for fine-tuning the CITED foundation model.",
    "The CITING authors mention specific hyperparameters adjusted during the fine-tuning of the CITED foundation model.",
    "The CITING authors compare results between pre-trained and fine-tuned versions of the CITED foundation model.",
    "The CITING authors mention the use of regularization techniques during the fine-tuning of the CITED foundation model.",
    "The CITING authors evaluate the CITED foundation model's performance before and after fine-tuning.",
    "The CITING authors describe modifying the architecture of the CITED foundation model prior to fine-tuning.",
    "The CITING authors report the impact of fine-tuning on the CITED foundation model's generalization ability.",
    "The CITING authors use the CITED foundation model in a semi-supervised learning framework involving fine-tuning.",
    "The CITING authors discuss challenges faced during the fine-tuning of the CITED foundation model.",
    "The CITING authors use data augmentation techniques in conjunction with fine-tuning the CITED foundation model.",
    "The CITING authors mention training the CITED foundation model on a multi-task learning setup.",
    "The CITING authors use the CITED foundation model to initialize another model which is then fine-tuned.",
    "The CITING authors highlight improvements in task performance due to fine-tuning the CITED foundation model."
][:20]
questions = {i+1: question for i, question in enumerate(question_list + question_list)}


#section_lengths = [5,8,4,2]
#assert(sum(section_lengths) == len(questions))

questions_as_string = '\n'.join([f"{key}. {value}" for key, value in questions.items()])

GENERIC_PROMPT = ("""The following sentences are from an academic paper (the CITING paper) which references a pretrained machine learning model through citation (the CITED paper). The models are called foundation models, and they might be a language model, a vision model, or any other kind of large neural network. The CITED paper is highlighed using HTML tags as such: <cite> cited reference </cite>. All other foundation models can be ignored, as we only care about the model cited with these tags. If it's helpful, the model identifier of the CITED model is {{modelKey}}. 

We'd like to discern how the CITING paper makes use of the foundation model, as described within the sentences. {question_statement}

\n
We want to be judicious and avoid guessing. The authors must explicitly mention the behavior in question specifically in relation to the CITED model with model identifier {{modelKey}}. Use only this JSON format in your response: {json_format}, based on the following:\n\n\"{{input}}\"""" )

MULTIPROMPT = GENERIC_PROMPT.format(question_statement = "Below are a set of statements which will be evaluated as either true or false.\n" + questions_as_string +
                                    """\nPlease respond with the following information per question: \n(a) Provide a direct quote from the sentences which includes the <cite> cited model </cited> and helps answer the question. \n(b) Explain clearly how the sentence answers the question. \n(c) The final answer to the question (True or False, of null if unsure)""",
                                    json_format = '''{{"quote_1": "Quotation from the sentences with the cited model", 
                                                        "explanation_1": "Information relevant to the first question...", 
                                                        "answer_1": "True | False", 
                                                        "quote_2": "Quotation from the sentences with the cited model", 
                                                        "explanation_2": "Information relevant to the second question..." , 
                                                        "answer_2": "True | False", ... }}''')
SINGLEPROMPT = GENERIC_PROMPT.format(question_statement = "The following statement is to be evaluated as either true or false.\n{question}\n",
                                     json_format = '{{"answer": "True | False"}}')

print(SINGLEPROMPT.format(input = "SENTENCE HERE", modelKey = "key", question = "Question!"))
#print(MULTIPROMPT.format(input = "SENTENCE HERE", modelKey = "key"))

The following sentences are from an academic paper (the CITING paper) which references a pretrained machine learning model through citation (the CITED paper). The models are called foundation models, and they might be a language model, a vision model, or any other kind of large neural network. The CITED paper is highlighed using HTML tags as such: <cite> cited reference </cite>. All other foundation models can be ignored, as we only care about the model cited with these tags. If it's helpful, the model identifier of the CITED model is key. 

We'd like to discern how the CITING paper makes use of the foundation model, as described within the sentences. The following statement is to be evaluated as either true or false.
Question!




We want to be judicious and avoid guessing. The authors must explicitly mention the behavior in question specifically in relation to the CITED model with model identifier key. Use only this JSON format in your response: {"answer": "True | False"}, based on t

In [6]:
def prompt_all(multisentence, modelKey):
    return prompt(
            MULTIPROMPT.format(input = multisentence, modelKey = modelKey),
            model = MODEL,  
            connection_on = CONNECTION_ON,                
            temperature = 1)
    
def prompt_singles(multisentence, modelKey):
    results = {}
    for idx, question in questions.items():
        string_answer = prompt(
            SINGLEPROMPT.format(input = multisentence, modelKey = modelKey, question = question),
            model = MODEL,  
            connection_on = CONNECTION_ON,                
            temperature = 1
        )
        as_json = stripJSON(string_answer)

        if (as_json is None):
            print("Got NULL result")
        results[f"answer_{idx}"] = False if as_json is None else as_json['answer']
    
    return json.dumps(results)

# Load existing or default dataframe, query LLM

In [7]:
if CONNECTION_ON:
    df = pd.read_csv(master_path + '.csv')
else:
    other_path = '~/Desktop/2. FutureTech/uniform_sample/results/uniform_base_sample_gpt-4o_prompt_2024-07-17 11:55:00.csv'
    df = update_labels(other_path, save = True)

df['modelKeyStriped'] = df['modelKey'].apply(lambda s: re.sub(r'^\d+_', '', s))
print(len(df))

519


In [8]:
Counter(df['alex2'])

Counter({'background': 265,
         'uses': 111,
         'similarities': 77,
         'extends': 38,
         'differences': 13,
         'motivation': 10,
         '*': 4,
         'future work': 1})

In [9]:
file_path = os.path.expanduser((master_path + f"_{MODEL}_prompt_{right_now}.txt").replace('uniform_sample/raw/', 'uniform_sample/prompts/'))
new_path = os.path.expanduser((master_path + f"_{MODEL}_prompt_{right_now}.csv").replace('uniform_sample/raw/', 'uniform_sample/results/'))

with open(file_path, 'w') as f:
    f.write(MULTIPROMPT)

if (label_col) not in set(df.columns):
    print("Resetting results")
    df[label_col] = [None for i in range(len(df))]

for idx in tqdm(df.index):
    if df[label_col].loc[idx] is not None:
        continue

    df[label_col].at[idx] = prompt_singles(df['multisentence'], df['modelKeyStriped'].loc[idx])


if (CONNECTION_ON):
    df.to_csv(new_path, index = False)
    CONNECTION_ON = False

Resetting results


 49%|████▉     | 256/519 [2:10:35<2:00:58, 27.60s/it]  

Got NULL result


 60%|█████▉    | 309/519 [2:32:57<1:28:22, 25.25s/it]

Got NULL result


100%|██████████| 519/519 [4:05:10<00:00, 28.34s/it]  


In [10]:
df

Unnamed: 0,Unnamed: 0.1,sentence,mcllm,modelKey,paperId,modelId_x,mc,modelId_y,mc_reduced,urop_sentence,...,classification_order,labels,modelTitle,modelId,modelYear,paperYear,pot_extends,prob_extends,modelKeyStriped,json_response
0,0,"in recent years, many large-scale pre-trained ...",background,417_gpt-3_175b_(davinci),d8d578d4ece329f17b025946587b1751721b9144,6b85b63579a916f705a8e10a49bd8d849d91b1fc,background,6b85b63579a916f705a8e10a49bd8d849d91b1fc,context,"in recent years, many large-scale pre-trained ...",...,,,,,,,,,gpt-3_175b_(davinci),"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
1,1,one of the known problems with contrastive tra...,background,1013_wave2vec_2.0_large,7f0c7c324675179f0e32c160d99c7066c7ab30ae,49a049dc85e2380dde80501a984878341dd8efdf,background,49a049dc85e2380dde80501a984878341dd8efdf,context,one of the known problems with contrastive tra...,...,,,,,,,,,wave2vec_2.0_large,"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
2,2,further work in compound scaling yielded model...,background,377_efficientnet-l2,970cb7b5b25da0f1f8b000add10960680fe8cd2e,4f2eda8077dc7a69bb2b4e0a1a086cf054adb3f9,background,4f2eda8077dc7a69bb2b4e0a1a086cf054adb3f9,context,further work in compound scaling yielded model...,...,,,,,,,,,efficientnet-l2,"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
3,3,"due to the high computing cost, conducting a t...",background,417_gpt-3_175b_(davinci),b6ec1e8f18185b4b3d46201359a440404575460c,6b85b63579a916f705a8e10a49bd8d849d91b1fc,background,6b85b63579a916f705a8e10a49bd8d849d91b1fc,context,"due to the high computing cost, conducting a t...",...,,,,,,,,,gpt-3_175b_(davinci),"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
4,4,## 1 introduction\n\n\nthe multilingual bert ...,background,1064_bert-large,1234fcc1577a32b829d2886fdf68375b9d4525e9,df2b0e26d0599ce3e70df8a9da02e51594e0e992,background,df2b0e26d0599ce3e70df8a9da02e51594e0e992,context,## 1 introduction\n\nthe multilingual bert mo...,...,,,,,,,,,bert-large,"{""answer_1"": ""False"", ""answer_2"": ""True"", ""ans..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514,514,,,1114_mask_r-cnn,3c6321c030b656f6735c0b0239a18b7f8d30f438,,,,,,...,,,,,,,,,mask_r-cnn,"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
515,515,,,1114_mask_r-cnn,8960d8b422ed5c6cb895e07bd0b0a6377ca6be57,,,,,,...,,,,,,,,,mask_r-cnn,"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
516,516,,,1158_yolox-x,13f1b60bbb44e0e33e3fab8e9c39077e2d918287,,,,,,...,,,,,,,,,yolox-x,"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."
517,517,,,1160_batchnorm,320f5f838b754df3b4d56562b2beee0b7e67a515,,,,,,...,,,,,,,,,batchnorm,"{""answer_1"": ""False"", ""answer_2"": ""False"", ""an..."


In [11]:
if (CONNECTION_ON):
    print(f"Saving to {new_path}")
    df.to_csv(new_path, index = False)