In [1]:
import pandas as pd
from openai import OpenAI
import random

In [2]:
# Arguments
infile='./depmap/Model.csv.gz'
host='127.0.0.1'
port='9999'
model='meta-llama/Meta-Llama-3-70B-Instruct'
openai_api_key = 'cmsc-35360'
openai_api_base = f"http://{host}:{port}/v1"

In [3]:
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

In [4]:
def load_data(f):
    df=pd.read_csv(f)
    return df

In [5]:
def select_random_element_not_equal_to(array, correct):
    '''
    Given an array of unique answers, select an answer not equal to the correct answer.
    '''
    if len(array) == 0:
        raise ValueError("The array is empty")

    # Filter out elements equal to `correct`
    filtered_array = [element for element in array if element != correct]
    
    if not filtered_array:
        raise ValueError("No valid elements to choose from")

    # Select a random element from the filtered list
    return random.choice(filtered_array)

In [6]:
def remove_correct_cell_line_answers(disease, df):
    '''
    Returns a list of unique cell line names not associated with disease.
    '''
    
    filtered = df[df['OncotreePrimaryDisease'] != disease]
    array = filtered['CellLineName'].unique()
    
    # debug
    # print(f'{df.shape}\t{filtered.shape}')
    # print(f'{len(array)}')
    
    return array

In [7]:
def construct_question(df, index):
    '''
    Construct a multiple choice question from df using row at index.
    Row at index will have the correct answer.
    The incorrect answers are randomly selected from array.
    '''
    if index < 0 or index >= len(df):
        raise IndexError("Index out of bounds")
    
    disease = df.iloc[index]['OncotreePrimaryDisease']
    correct_answer = df.iloc[index]['CellLineName']
    wrong_answers = remove_correct_cell_line_answers(disease, df)
    
    wrong_answer_1 = select_random_element_not_equal_to(wrong_answers, correct_answer)
    wrong_answer_2 = select_random_element_not_equal_to(wrong_answers, correct_answer)
    wrong_answer_3 = select_random_element_not_equal_to(wrong_answers, correct_answer)

    while wrong_answer_1 == wrong_answer_2 or wrong_answer_1 == wrong_answer_3 or wrong_answer_2 == wrong_answer_3:
        wrong_answer_1 = select_random_element_not_equal_to(wrong_answers, correct_answer)
        wrong_answer_2 = select_random_element_not_equal_to(wrong_answers, correct_answer)
        wrong_answer_3 = select_random_element_not_equal_to(wrong_answers, correct_answer)
        
    answers = [correct_answer,
               wrong_answer_1,
               wrong_answer_2,
               wrong_answer_3]
    random.shuffle(answers)

    d = {"a": answers[0],
         "b": answers[1],
         "c": answers[2],
         "d": answers[3],
        }

    for choice in d.keys():
        if d[choice] == correct_answer:
            correct_choice = choice
    
    question = f'''The primary disease {disease} can be best studied using which cell line?
a) {d['a']}
b) {d['b']}
c) {d['c']}
d) {d['d']}'''
    
    return question, correct_choice, correct_answer, disease
    

In [8]:
def debug(index, disease, df, answer):
    '''
    The answer is the name of the cell line associated that can be used as a model for the disease.
    We get all cell lines associated with that disease, and check if answer is in that list.
    '''
    filtered_df = df[df['OncotreePrimaryDisease'] == disease]
    cell_line_names = filtered_df['CellLineName'].values
    filtered_array = [element for element in cell_line_names if element == answer]
    
    #print(cell_line_names)
    #print(filtered_array)

    return filtered_array

In [10]:
import json
from tqdm import tqdm
from datetime import datetime

df = load_data(infile)


sys_reg_prompt = '''You are a cancer research scientist studying the potential effects of various small molecules, peptides, and 
antibiodies on tumor cell growth. You will be presented with a series of multiple choice questions. Please select the correct
choice. Return the answer in json format {"CHOICE": choice, "ANSWER": answer} where choice is only the alphabetic character 
associated with the full answer.'''

# This is not working yet because the experts talk to each other thus making the
# parsing of the final answer different from the sys_reg_prompt.
sys_tot_prompt = '''Imagine three experts are answering this question.
They will brainstorm the answer step by step, reasoning carefully and taking all facts into consideration.
All experts will write down one step of their thinking, then share it with the group.
They will each critique their response and all the responses of others.
They will check their answer based on science, laws of physics and logic.
Then all experts will go on to the next step and write down this step of their thinking.
They will keep going through steps until they reach their conclusions taking into account the thoughts of the other experts.
If at anytime they realize that there is a flaw in their logic they will backtrack to where the flaw occurred.
If any expert realizes they are wrong at any point they acknowledge this and start another train of thought.
Each expert will assign a likelihood of their current assertion being correct.
Continue until the experts agree on the single most likely choice. Return the response in json format FINAL_ANSWER={"DISCUSSION": discussion, "CHOICE": choice, "ANSWER": answer} where choice is only the alphabetic character 
associated with the full answer.'''
 
for i in range(0, 9): # 9):
    num_correct = 0
    total = 0
    responses = []


    for i in tqdm( range(df.shape[0]) ):
        # get question, correct choice and answer
        question, correct_choice, correct_answer, disease = construct_question(df, i)

        # For debugging.
        #print(f'{question}')
        #print(f'The correct choice is {correct_choice}. {correct_answer}')
        #print(f'The df row has {df.iloc[i]["OncotreePrimaryDisease"]} {df.iloc[i]["CellLineName"]}')
    
        # construct message
        messages=[
            {"role": "system", 
             "content": sys_reg_prompt
            },
            {"role": "user",
             "content": question
            }
        ]
    
        chat_response = client.chat.completions.create(
            model=model,
            # logprobs=1,
            # top_logprobs=1,
            messages=messages,
            temperature=0.0,
            max_tokens=2560,
        )
        
        
        try:
            response = json.loads(chat_response.choices[0].message.content)
            response['CORRECT CHOICE'] = correct_choice
            response['CORRECT ANSWER'] = correct_answer
            response['PRIMARY_DISEASE'] = disease
    
            if response['CHOICE'] == correct_choice:
                response['SCORE'] = 1
                num_correct = num_correct + 1
            else:
                response['SCORE'] = 0
                filtered_array = debug(i, disease, df, response["ANSWER"])
                if len(filtered_array) > 0:
                    print(f'{question}\n')
                    print(f'choice: {response["CHOICE"]} answer: {response["ANSWER"]}')
                    print(f'correct choice: {response["CORRECT CHOICE"]} correct answer: {response["CORRECT ANSWER"]}')
                    print(f'index: {i}')
                    print(f'wrong answer found in filtered array {filtered_array}')

                    class MyException(Exception):
                        pass
                    raise MyException("My hovercraft is full of eels")

            total = total + 1
            responses.append(response)
            #print(f'{response}') 

        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
            print(f"{question}")
            print(f"{chat_response.choices[0].message.content}")

            response["CHOICE"] =  "e"
            response["ANSWER"] =  chat_response.choices[0].message.content
            response['CORRECT CHOICE'] = correct_choice
            response['CORRECT ANSWER'] = correct_answer
            response['PRIMARY_DISEASE'] = disease
            response['SCORE'] = 0
            
            responses.append(response)
            pass
            
        except MyException as e:
            print(f"Error finding unique answer: {e}")
            pass
    
    print(f'{num_correct} correct responses out of {total}')
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")

    response_df = pd.DataFrame(responses)
    response_df.to_csv(f'disease_eval_{timestamp}.tsv', index=None, sep="\t")

    with open('disease_eval_summary.txt', 'a') as f:
        print(f'{timestamp}\t{num_correct} correct responses out of {total}', file=f)

 98%|███████████████████████████████████████████████████████████████████████████████▌ | 1887/1921 [14:30<00:14,  2.31it/s]

The primary disease Invasive Breast Carcinoma can be best studied using which cell line?
a) no.10
b) NH93T
c) ME-1
d) NCI-H1819

choice: a answer: MDA-MB-231
correct choice: b correct answer: NH93T
index: 1886
wrong answer found in filtered array ['MDA-MB-231']
Error finding unique answer: My hovercraft is full of eels


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [14:47<00:00,  2.17it/s]


1458 correct responses out of 1920


 47%|██████████████████████████████████████▉                                           | 911/1921 [07:20<08:57,  1.88it/s]

The primary disease Mature B-Cell Neoplasms can be best studied using which cell line?
a) BT-549
b) HT
c) BC-3C
d) MONO-MAC-6

choice: c answer: BC-3
correct choice: b correct answer: HT
index: 910
wrong answer found in filtered array ['BC-3']
Error finding unique answer: My hovercraft is full of eels


 64%|███████████████████████████████████████████████████▍                             | 1220/1921 [09:48<04:48,  2.43it/s]

The primary disease Renal Cell Carcinoma can be best studied using which cell line?
a) JAR
b) SW 156
c) TEN
d) NCI-H322M

choice: c answer: 786-O
correct choice: b correct answer: SW 156
index: 1219
wrong answer found in filtered array ['786-O']
Error finding unique answer: My hovercraft is full of eels


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [15:05<00:00,  2.12it/s]


1448 correct responses out of 1919


 98%|███████████████████████████████████████████████████████████████████████████████▏ | 1877/1921 [14:38<00:19,  2.24it/s]

The primary disease Colorectal Adenocarcinoma can be best studied using which cell line?
a) CCLF_CORE_0002_T
b) UWB1.289
c) Ca Ski
d) HTK-

choice: d answer: HT-29
correct choice: a correct answer: CCLF_CORE_0002_T
index: 1876
wrong answer found in filtered array ['HT-29']
Error finding unique answer: My hovercraft is full of eels


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [14:59<00:00,  2.13it/s]


1462 correct responses out of 1920


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [13:52<00:00,  2.31it/s]


1437 correct responses out of 1921


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [13:25<00:00,  2.38it/s]


1458 correct responses out of 1921


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [14:19<00:00,  2.23it/s]


1453 correct responses out of 1921


 52%|██████████████████████████████████████████▍                                       | 995/1921 [07:02<06:21,  2.43it/s]

The primary disease Colorectal Adenocarcinoma can be best studied using which cell line?
a) HTK-
b) CW-2
c) HDQ-P1
d) LC-1-sq

choice: a answer: HT-29
correct choice: b correct answer: CW-2
index: 994
wrong answer found in filtered array ['HT-29']
Error finding unique answer: My hovercraft is full of eels


 53%|██████████████████████████████████████████▉                                      | 1018/1921 [07:12<06:30,  2.31it/s]

The primary disease Embryonal Tumor can be best studied using which cell line?
a) HOP-62
b) CHSA0108
c) HOKUG
d) CHLA-57

choice: b answer: CHLA-266
correct choice: d correct answer: CHLA-57
index: 1017
wrong answer found in filtered array ['CHLA-266']
Error finding unique answer: My hovercraft is full of eels


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [13:29<00:00,  2.37it/s]


1434 correct responses out of 1919


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [13:25<00:00,  2.38it/s]


1474 correct responses out of 1921


 51%|██████████████████████████████████████████                                        | 986/1921 [06:53<06:14,  2.50it/s]

The primary disease Colorectal Adenocarcinoma can be best studied using which cell line?
a) SNU-175
b) ABM-T0781
c) Hs 863.T
d) HTK-

choice: d answer: HT-29
correct choice: a correct answer: SNU-175
index: 985
wrong answer found in filtered array ['HT-29']
Error finding unique answer: My hovercraft is full of eels


 55%|████████████████████████████████████████████▏                                    | 1049/1921 [07:19<05:53,  2.47it/s]

The primary disease Pancreatic Adenocarcinoma can be best studied using which cell line?
a) GR-M
b) RCC-JF
c) RH-41
d) A673STAG2KO16

choice: c answer: PANC-1
correct choice: a correct answer: GR-M
index: 1048
wrong answer found in filtered array ['PANC-1']
Error finding unique answer: My hovercraft is full of eels


 74%|████████████████████████████████████████████████████████████▏                    | 1426/1921 [09:59<09:58,  1.21s/it]

Error decoding JSON: Extra data: line 3 column 1 (char 51)
The primary disease Breast Neoplasm, NOS can be best studied using which cell line?
a) UM-UC-16
b) COLO 824
c) OVCA420
d) NCI-H196
{"CHOICE": "None", "ANSWER": "None of the above"}

The correct answer is not among the options provided. Breast Neoplasm, NOS (Not Otherwise Specified) is a type of breast cancer, and the cell lines provided are not specific to breast cancer. 

UM-UC-16 is a bladder cancer cell line, COLO 824 is a colon cancer cell line, OVCA420 is an ovarian cancer cell line, and NCI-H196 is a lung cancer cell line. A more suitable cell line for studying breast cancer would be MCF-7, MDA-MB-231, or SK-BR-3, among others.


100%|█████████████████████████████████████████████████████████████████████████████████| 1921/1921 [13:29<00:00,  2.37it/s]

1456 correct responses out of 1918



