## Testing of Control: Without use of VectorDB and Similarity Search

In this notebook, we will pass in the entire csv as text into the LLM together with the prompt to perform control testing. 

In [None]:
import os
from src.utils.bedrock_caller import BedrockCaller

bedrock_caller = BedrockCaller()

# Specify the path of the prompt
prompt_file_name = 'clinical-notes-01-prompt-specific.txt'
prompt_file_path = os.path.join('prompts', prompt_file_name)

# Load prompt
with open(prompt_file_path, 'r') as f:
    prompt = f.read()

print(prompt)

In [None]:
import pandas as pd

# File name
data_file_name = 'chan-RenalGenie_Clinical_Note_csv.csv'

# Load the CSV file (data frame)
df = pd.read_csv(os.path.join('data', 'csv_xlsx', data_file_name))
df = df.dropna(how='all')
df_cleaned = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Convert DataFrame to a string (CSV format)
data_string = df_cleaned.to_csv(index=False)

print(data_string)

In [None]:
sample_file_name = 'clinical-notes-01.txt'
sample_file_path = os.path.join('sample_responses', sample_file_name)

# Load sample
with open(sample_file_path, 'r') as f:
    sample = f.read()

system_role = "You are a highly knowledgeable assistant specialized in creating clinical documents and answering questions. You will receive the user's question along with the data to sieve through. Please use the data, and not the sample in the user's question, to generate the proper answer. "
final_prompt = f"User's question: {prompt} \n\n Data:\n {data_string} \n\n Sample: {sample}"

messages_API_body = {
    "anthropic_version": "bedrock-2023-05-31", 
    "max_tokens": int(500/0.75),
    "system": system_role, 
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": final_prompt
                }
            ]
        }
    ]
}

In [None]:
response_body = bedrock_caller.call_claude3sonnet(messages_API_body)
llm_output = response_body['content'][0]['text']

print(llm_output)

In [None]:
import re
import os

sample_file_name = 'clinical-notes-01.txt'
save_dir = os.path.join('results', os.path.splitext(sample_file_name)[0])

# Regular expression to match files named 'attempt-x.txt'
pattern = re.compile(r'control-attempt-(\d+)\.txt')
max_num = -1

# Find the highest attempt number
for filename in os.listdir(save_dir):
    match = pattern.match(filename)
    if match:
        num = int(match.group(1))
        if num > max_num:
            max_num = num
next_num = max_num + 1 if max_num != -1 else 1

# Create the new filename
new_filename = f'control-attempt-{next_num}.txt'
new_filepath = os.path.join(save_dir, new_filename)

# Save the data to the new file
with open(new_filepath, 'w') as file:
    file.write(llm_output)