In [1]:
# Arguments
f='./depmap/Model.csv.gz'
host='127.0.0.1'
port='9999'
model='meta-llama/Meta-Llama-3-70B-Instruct'

In [2]:
from openai import OpenAI

# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = 'cmsc-35360'
openai_api_base = f"http://{host}:{port}/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

In [3]:
import pandas as pd

def load_data(f):
    df=pd.read_csv(f)
    print(f'done importing dataframe (rows, columns) = {df.shape}.')
    return df

In [4]:
import random

def select_random_element_not_equal_to(array, correct):
    '''
    Given an array of unique answers, select an answer not
    equal to the correct answer.
    '''
    if len(array) == 0:
        raise ValueError("The array is empty")

    # Filter out elements equal to `correct`
    filtered_array = [element for element in array if element != correct]
    
    if not filtered_array:
        raise ValueError("No valid elements to choose from")

    # Select a random element from the filtered list
    return random.choice(filtered_array)

In [5]:
def construct_question(df, index):
    '''
    Construct a multiple choice question from df using row at index.
    '''
    if index < 0 or index >= len(df):
        raise IndexError("Index out of bounds")
    
    cell_line_name = df.iloc[index]['CellLineName']
    correct_answer = df.iloc[index]['OncotreePrimaryDisease']

    wrong_answer_1 = select_random_element_not_equal_to(array, correct_answer)
    wrong_answer_2 = select_random_element_not_equal_to(array, correct_answer)
    wrong_answer_3 = select_random_element_not_equal_to(array, correct_answer)

    while wrong_answer_1 == wrong_answer_2 or wrong_answer_1 == wrong_answer_3 or wrong_answer_2 == wrong_answer_3:
        wrong_answer_1 = select_random_element_not_equal_to(array, correct_answer)
        wrong_answer_2 = select_random_element_not_equal_to(array, correct_answer)
        wrong_answer_3 = select_random_element_not_equal_to(array, correct_answer)
        
    answers = [correct_answer,
               wrong_answer_1,
               wrong_answer_2,
               wrong_answer_3]
    random.shuffle(answers)

    d = {"a": answers[0],
         "b": answers[1],
         "c": answers[2],
         "d": answers[3],
        }

    for choice in d.keys():
        if d[choice] == correct_answer:
            correct_choice = choice
    
    # question = f'''The tumor cell line named {cell_line_name} is
    question = f'''The tumor cell line named {cell_line_name} is a biological model for which primary disease?
    
a) {d['a']},
b) {d['b']},
c) {d['c']},
d) {d['d']}.'''
    
    return question, correct_choice, correct_answer

In [6]:
import json
from tqdm import tqdm

df = load_data(f)
array = df['OncotreePrimaryDisease'].unique()
num_correct = 0
total = 0
responses = []


reg_prompt = '''You are a cancer research scientist studying the potential effects of various small molecules, peptides, and 
antibiodies on tumor cell growth. You will be presented with a series of multiple choice questions. Please select the correct
choice. Return the answer in json format {"CHOICE": choice, "ANSWER": answer} where choice is only the alphabetic character 
associated with the full answer.'''

tot_prompt = '''Imagine three experts are answering this question.
They will brainstorm the answer step by step, reasoning carefully and taking all facts into consideration.
All experts will write down one step of their thinking, then share it with the group.
They will each critique their response and all the responses of others.
They will check their answer based on science, laws of physics and logic.
Then all experts will go on to the next step and write down this step of their thinking.
They will keep going through steps until they reach their conclusions taking into account the thoughts of the other experts.
If at anytime they realize that there is a flaw in their logic they will backtrack to where the flaw occurred.
If any expert realizes they are wrong at any point they acknowledge this and start another train of thought.
Each expert will assign a likelihood of their current assertion being correct.
Continue until the experts agree on the single most likely choice. Return the response in json format FINAL_ANSWER={"DISCUSSION": discussion, "CHOICE": choice, "ANSWER": answer} where choice is only the alphabetic character 
associated with the full answer.'''


import re
import json

def extract_json(text):
    # Regex to find the pattern FINAL_ANSWER={...}
    # Example usage
    # text = "Some text FINAL_ANSWER={\"key\": \"value\", \"number\": 123} and more text"
    # json_data = extract_json(text)
    # if json_data:
    #     print(json_data)
    match = re.search(r'FINAL_ANSWER=\{.*?\}', text)
    if match:
        # Extract the matched text and trim "FINAL_ANSWER="
        json_str = match.group(0)[13:]
        try:
            # Parse the JSON string into a Python dictionary
            json_data = json.loads(json_str)
            # return json_data
            return json_str
        except json.JSONDecodeError:
            print("Error decoding JSON")
    else:
        print("No JSON data found")



 

for i in tqdm( range(df.shape[0]) ):
    # get question, correct choice and answer
    question, correct_choice, correct_answer = construct_question(df, i)

    # construct message
    messages=[
        {"role": "system", 
         "content": reg_prompt
        },
        {"role": "user",
         "content": question
        }
    ]

    chat_response = client.chat.completions.create(
        model=model,
        # logprobs=1,
        # top_logprobs=1,
        messages=messages,
        temperature=0.0,
        max_tokens=2560,
    )
    
    
    try:
        response = json.loads(chat_response.choices[0].message.content)
        response['CORRECT CHOICE'] = correct_choice
        response['CORRECT ANSWER'] = correct_answer

        if response['CHOICE'] == correct_choice:
            response['SCORE'] = 1
            num_correct = num_correct + 1
        else:
            response['SCORE'] = 0
            #    decoded_question= question.encode('utf-8').decode('unicode_escape')
            #    print(decoded_question)
            #    print(f'CORRECT ANSWER: {correct_choice}) {correct_answer}')
            #    print("Chat response:    ", chat_response.choices[0].message.content, "\n")

        total = total + 1
        responses.append(response)
        # print(response)


    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        print(f"{chat_response.choices[0].message.content}")

print(f'{num_correct} correct responses out of {total}')

done importing dataframe (rows, columns) = (1921, 36).


100%|███████████████████████████████████████| 1921/1921 [15:59<00:00,  2.00it/s]

1395 correct responses out of 1921





In [7]:
decoded_question= question.encode('utf-8').decode('unicode_escape')
print(decoded_question)
print(f'\nCORRECT ANSWER: {correct_choice}) {correct_answer}')
print('\nChat response:    ', chat_response.choices[0].message.content, '\n')


The tumor cell line named ABM-T9430 is a biological model for which primary disease?
    
a) Non-Cancerous,
b) Merkel Cell Carcinoma,
c) Cutaneous Squamous Cell Carcinoma,
d) Renal Cell Carcinoma.

CORRECT ANSWER: a) Non-Cancerous

Chat response:     {"CHOICE": "b", "ANSWER": "Merkel Cell Carcinoma"} 



In [8]:
responses[len(responses) -1]

{'CHOICE': 'b',
 'ANSWER': 'Merkel Cell Carcinoma',
 'CORRECT CHOICE': 'a',
 'CORRECT ANSWER': 'Non-Cancerous',
 'SCORE': 0}

In [9]:
response_df = pd.DataFrame(responses)
response_df.to_csv('model_name_eval.tsv', index=None, sep="\t")

In [10]:
print(response_df[response_df['CORRECT ANSWER'] == 'Non-Cancerous']['SCORE'].describe())
print('sum: ', response_df[response_df['CORRECT ANSWER'] == 'Non-Cancerous']['SCORE'].sum())


count    131.000000
mean       0.114504
std        0.319645
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        1.000000
Name: SCORE, dtype: float64
sum:  15


In [None]:
'''
import sys
sys.path.append('/rbscratch/brettin/DepMap-Experiments/tool_collection/agents')
from ARGO import ArgoWrapper, ArgoEmbeddingWrapper
from CustomLLM import ARGO_LLM, ARGO_EMBEDDING
argo_wrapper_instance = ArgoWrapper()
llm = ARGO_LLM(argo=argo_wrapper_instance,model_type='gpt4', temperature = 0.5)
argo_response = llm.invoke(input=messages)
print(argo_response)
'''