In [None]:
import itertools

import openai
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# connect to server for hypothesis generation
import sys
import time


port = 80

server_ip = sys.argv[1]

# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "cmsc-35360"
openai_api_base = f"http://195.88.24.64:{port}/v1"


prompt = "Read the following scientific hypothesis carefully: "

prompt2 = (
    " Given this hypothesis,"
    " develop an experimental plan to test the generated hypotheses using prexisiting scientific methods and equipment."
    " These Instructions should be a high-level experimental approach that"
    " could theoretically be executed in a laboratory."
)


def create_prompt(chunk):
    gpt_user_prompt = prompt + chunk + prompt2
    return gpt_user_prompt


client = openai.OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)


gpt_assistant_prompt = "You are a super smart AI that knows about science. You follow directions and you are always truthful and concise in your reponses."


def query_llm(gpt_user_prompt):
    message = [
        {"role": "assistant", "content": gpt_assistant_prompt},
        {"role": "user", "content": gpt_user_prompt},
    ]
    temperature = 0.0
    frequency_penalty = 0.0
    max_retries = 3
    retry_count = 0

    while retry_count < max_retries:
        try:
            response = client.chat.completions.create(
                model="meta-llama/Meta-Llama-3-70B-Instruct",
                messages=message,
                temperature=temperature,
                frequency_penalty=frequency_penalty,
            )
            time.sleep(2)
            return response.choices[0].message.content
            break
        except:
            print("Hypo trying again port " + str(port))
            retry_count += 1
            time.sleep(2)
            if retry_count == max_retries:
                print("Maximum retries reached. Exiting the program.")
                exit
    time.sleep(2)


In [None]:
df = pd.read_csv("hypothesis_out.csv")
df


In [None]:
column_names = [
    "papers",
    "HYPOTHESIS",
    "paperinfo",
    "disprove_info",
    "budget_info",
    "plan",
]

# Create an empty DataFrame with the specified column names
dfnew = pd.DataFrame(columns=column_names)
dfnew


In [None]:
def read_files(file_paths):
    all_text = ""
    for file_path in file_paths:
        try:
            with open(file_path, "r") as file:
                all_text += file.read() + "\n"
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
    return all_text


In [None]:
import ast


papers_info = "Here are the summaries of the papers this hypothesis was generated from for context:"
no_papers_info = ""

disprove_info = "To check if this hypothesis is true, come up with an experiment designed to DISPROVE THIS hypothesis to verify if it is true or not."
no_disprove_info = ""

expensive_info = "This experiment has an unlimited budget in terms of time, resources, and financial support."
cheap_info = "This experiment has a limited budget in terms of time, resources, and financial support and should be implementable within a reasonable time frame and for a reasonable price."
no_budget_info = ""

# List of options for each choice
papers_options = [papers_info, no_papers_info]
disprove_options = [disprove_info, no_disprove_info]
budget_options = [cheap_info, expensive_info, no_budget_info]

# Generate all combinations of the options
all_combinations = itertools.product(disprove_options, budget_options, papers_options)

# Iterate over each combination and append to the base string

for h, p in zip(df["HYPOTHESIS"], df["papers"]):
    gpt_user_prompt = create_prompt(h)
    print(h)
    for combination in all_combinations:
        # print(combination)
        combined_string = gpt_user_prompt + " ".join(
            [option for option in combination if option]
        )
        paperinfo, disproveinfo, budgetinfo = combination

        if combined_string.endswith("context:"):
            pp = ast.literal_eval(p)
            all_text = read_files(pp)
            combined_string = combined_string + all_text
        response = query_llm(combined_string)

        new_data = pd.DataFrame(
            {
                "papers": [p],
                "HYPOTHESIS": [h],
                "paperinfo": [paperinfo],
                "disprove_info": [disproveinfo],
                "budget_info": [budgetinfo],
                "plan": [response],
            }
        )
        dfnew = pd.concat([dfnew, new_data], ignore_index=True)
    all_combinations = itertools.product(
        disprove_options, budget_options, papers_options
    )


In [None]:
dfnew.to_csv("high_level_out.csv", index=False)
