In [None]:
import itertools

import openai
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# connect to server for hypothesis generation
import sys
import time


port = 80

server_ip = sys.argv[1]

# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "cmsc-35360"
openai_api_base = f"http://195.88.24.64:{port}/v1"


prompt = (
    "Read the following scientific experimental plan carefully, and translate the high-level experimental plans into detailed, step-by-step protocols."
    " Consider the requirements for replicability and specificity in experimental science, and"
    " how these steps could be communicated to someone (or something, like a robot) executing the experiment. Here is the plan."
)

prompt2 = "Remember, return detailed, step-by-step protocols for specific science experiments."


def create_prompt(chunk):
    gpt_user_prompt = prompt + chunk + prompt2
    return gpt_user_prompt


client = openai.OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)


gpt_assistant_prompt = "You are a super smart AI that knows about science. You follow directions and you are always truthful and concise in your reponses."


def query_llm(gpt_user_prompt):
    message = [
        {"role": "assistant", "content": gpt_assistant_prompt},
        {"role": "user", "content": gpt_user_prompt},
    ]
    temperature = 0.0
    frequency_penalty = 0.0
    max_retries = 3
    retry_count = 0

    while retry_count < max_retries:
        try:
            response = client.chat.completions.create(
                model="meta-llama/Meta-Llama-3-70B-Instruct",
                messages=message,
                temperature=temperature,
                frequency_penalty=frequency_penalty,
            )
            time.sleep(2)
            return response.choices[0].message.content
            break
        except:
            print("Hypo trying again port " + str(port))
            retry_count += 1
            time.sleep(2)
            if retry_count == max_retries:
                print("Maximum retries reached. Exiting the program.")
                exit
    time.sleep(2)


In [None]:
df = pd.read_csv("best_scoring_high_level.csv")
df["specific_plan"] = None
df


In [None]:
i = 0
for h, p in zip(df["HYPOTHESIS"], df["plan"]):
    gpt_p = create_prompt(p)
    response = query_llm(gpt_p)
    df.at[i, "specific_plan"] = response
    i += 1


In [None]:
df.to_csv("specific_protocol.csv", index=False)
