In [1]:
def extract_formulations(response):
    formulations = []

    # Extract text from the response and remove double quotes
    text_response = response.replace('"', '')

    # Split the text into lines
    lines = text_response.split('\n')

    # Initialize a dictionary to hold the current formulation
    current_formulation = {}

    for line in lines:
        # Check if the line contains formulation information
        if "The formulation is" in line:
            # Add the current formulation to the list if it's not empty
            if current_formulation:
                formulations.append(current_formulation)
            # Start a new formulation
            current_formulation = {}
        else:
            # Split the line into key-value pairs
            parts = line.strip().split(':')
            if len(parts) == 2:
                key = parts[0].strip()
                value = parts[1].strip()
                current_formulation[key] = value

    # Add the last formulation if it exists
    if current_formulation:
        formulations.append(current_formulation)
    return formulations


def run_TVDL_strategy(system_message, training_data, tested_formulations,iterate_prompt,context_prompt,VM_role_prompt,  conversation_dropdown,enable_TVDL_strategy):
    class AssistantResponse:
        def __init__(self, role, content):
            self.role = role
            self.content = content
    # Initialize list to store predicted strengths and corresponding formulations
    predictions = []
    unique_formulations = []

    # Start with the system message
    messages = [
        {"role": "system", "content": system_message}
    ]
    
    # Add training data and iteration prompt
    if training_data:
        messages.append({"role": "assistant", "content": "Previously, we have tested these combinations:\n" + "\n".join(tested_formulations)})
        messages.append({"role": "user", "content": iterate_prompt})
        training_formulations = extract_formulations_from_training_data(training_data)
    # Add the new user prompt to ask for three unique formulations
    if enable_TVDL_strategy == 1:
        messages.append({"role": "user", "content": "Output 3 three completly new and untested combinations with an extremly high expected performance , each of the three must adhere to the given structure and parameter limits!."})
    else: 
        messages.append({"role": "user", "content": "Output 5 five different unique combinations with a extremly high expected performance, each of the five must adhere to the given structure and parameter limits!."})
    
    max_retries = 10  # Maximum number of retries for getting valid formulations
    retry_count = 0  # Retry counter

    while not predictions and retry_count < max_retries:
        if retry_count > 0:
            messages.append({"role": "user", "content": "You must propose novel and unique combinations that have not been tested before!"})
            if conversation_dropdown.value == 'Complete':
                print("You must propose novel and unique combinations that have not been tested before!")
        if retry_count > 1:
            messages.append({"role": "user", "content": "Your suggestions must be novel untested and lie exactly on this grid: //A: 360, 370, 380, 390,400, 410, 420, 430, 440, 450 //B: 0.45, 0.5, 0.55, 0.6 //C: 0.7/0.3, 0.6/0.4, 0.5/0.5 //D: 0, 1"})
            if conversation_dropdown.value == 'Complete':
                print("Your suggestions must be novel untested and lie exactly on this grid: //A: 360, 370, 380, 390,400, 410, 420, 430, 440, 450 //B: 0.45, 0.5, 0.55, 0.6 //C: 0.7/0.3, 0.6/0.4, 0.5/0.5 //D: 0, 1")
        if retry_count > 2:
            messages.append({"role": "user", "content": "Try harder! Pay close attention to the exact format and order of ther parameters, for instance 0.6/0.4 and not 0.4/0.6"})
            if conversation_dropdown.value == 'Complete':
                print("Try harder! Pay close attention to the exact format and order of ther parameters, for instance 0.6/0.4 and not 0.4/0.6")
        
                    
        
        # Make API call
        assistant_response = call_openai_api(messages, temp)
        # Extract the three formulations
        response_lines = assistant_response.choices[0].message.content.split('\n')
        
        # Define a regular expression pattern to capture the relevant information
        pattern = re.compile(r'A\s*=\s*(\d+),\s*B\s*=\s*(\d+\.\d+),\s*C\s*=\s*(\d+\.\d+/\d+\.\d+),\s*D\s*=\s*(\w+)', re.IGNORECASE)

        # Use list comprehension to find all matches in the response lines
        formulations = [match.group(0) for line in response_lines for match in [pattern.search(line)] if match]
    
        
        #formulations = [line for line in response_lines if line.startswith("The formulation is")]
        if training_data:
            unique_formulations = [f for f in formulations if f not in training_formulations]
            if len(unique_formulations) == 0:
                print('no new formulations extracted')
            formulations = unique_formulations        
        # Handle the case where all suggested formulations are duplicates
        # Maybe prompt the assistant again or take other actions
        if training_data:
            unique_formulations = [f for f in formulations if f not in training_formulations]
            if len(unique_formulations) == 0:
                print('no new formulations extracted')
            formulations = unique_formulations  # Or keep it as formulations_as_strings if you proceed with string comparisons  
        # Handle the case where all suggested formulations are duplicates
        # Maybe prompt the assistant again or take other actions

        if conversation_dropdown.value == 'Complete':
            print("Extracted Formulations:", formulations)

        # Prepare the forward task prompt
        
        if training_data == []:
            forward_prompt_base = f"{context_prompt} \n {VM_role_prompt}"
        else:
            forward_prompt_base = f"{context_prompt} \n ////Previous Formulation and Lab Validation:" + "\n".join(training_data) + f"\n{VM_role_prompt}"

        for formulation in formulations:
            # Create the full forward task prompt
            forward_prompt = f"{forward_prompt_base}\n Considering this context, what is the performance of {formulation}? Answer in this exact format: {'your estimate'}"

            if conversation_dropdown.value == 'Complete':
                print("Forward Prompt:", forward_prompt)

            # Make the API call to get the forward task prediction
            forward_response = call_openai_api([{"role": "system", "content": system_message}, {"role": "user", "content": forward_prompt}], 0)

            # Extract the predicted strength from the model's response
            try:
                predicted_strength_str = forward_response.choices[0].message.content.split(' ')[-1]
                predicted_strength_str_cleaned = predicted_strength_str.strip("' ")

                # Now convert to float
                predicted_strength = float(predicted_strength_str_cleaned)
            except ValueError:
                print(f"Could not convert performance for {formulation} to float. Skipping this combination.")
                continue

            if conversation_dropdown.value == 'Complete':
                print(f"Performance for {formulation}: {predicted_strength}")

            # Append to the predictions list
            predictions.append((formulation, predicted_strength))
            
        if predictions:
            break  # Exit the loop if valid predictions are found
        else:
            print(f"No valid results on attempt {retry_count + 1}. Retrying...")
            retry_count += 1

    # Handle case where predictions list is empty
    if not predictions:
        print("No valid predictions were made. Defaulting to the first combination.")
        return {'role': 'assistant', 'content': formulations[0] if formulations else 'Unable to make valid predictions.'}

    # Select the formulation with the highest predicted strength
    final_response, best_strength = max(predictions, key=lambda x: x[1])
    # Return an instance of AssistantResponse
    return AssistantResponse('assistant', final_response)

# Iterate over the different temperature settings
for temp in temperatures:

    # Repeat the whole experiment n times
    for experiment in range(NrOfExper):
        print(f"\n---\nStarting experiment {experiment+1}...\n---")
        training_data = []
        tested_formulations =[]
        if enable_TVDL_strategy == 0:
            print('SFDL')
        else:
            print('TVDL')
            # Load your data
            data = load_data('Data/DiscoveryData_Sample.csv')

            # Select and format your training data
            training_data, tested_formulations  = select_and_format_data(data, desired_strength)
        
        current_strength = 0.0
        iterations = 0
        formulations=[]
        # System message including both the role prompt and context
        system_message = DA_role_prompt.value  + '\n' + context_prompt.value

        while iterations < budget:
            iterations += 1
            print(f"\n---\nStarting iteration {iterations} at temp {temp}...")

            # Start with the system message
            messages = [
                {"role": "system", "content": system_message}
            ]
            


                
            # Add the training data to the messages
            if training_data:
                messages.append({"role": "assistant", "content": "Previously, we have tested these combinations:\n" + "\n".join(tested_formulations)})
                # Add the iteration prompt
                messages.append({"role": "user", "content": iterate_prompt.value})
                
            if conversation_dropdown.value == 'Complete':
                print("--- Conversation History ---")
                for msg in messages:
                    print(f"{msg['role']}: {msg['content']}")
            
            valid_solution = False
            while not valid_solution:
                if enable_TVDL_strategy == 1:  
                    assistant_response = run_TVDL_strategy(system_message, training_data,tested_formulations, iterate_prompt.value,context_prompt.value, VM_role_prompt.value, conversation_dropdown,enable_TVDL_strategy)
                elif enable_TVDL_strategy == 2:  
                    assistant_response = run_TVDL_strategy(system_message, training_data,tested_formulations, iterate_prompt.value,context_prompt.value, VM_role_prompt.value, conversation_dropdown,enable_TVDL_strategy)
                else:
                    response = call_openai_api(messages, temp)
                    assistant_response = response.choices[0].message 
                
                if assistant_response.role == "assistant":
                    print('Model Response: ', assistant_response.content)
                    
                    suggested_solution = parse_solution(assistant_response.content)

                    if suggested_solution is not None:
                        (lab_result,TrainingDat) = find_matching_result(formulation_df, suggested_solution)
                        
                        if lab_result:
                            current_strength = lab_result
                            training_data.append(f"{TrainingDat} resulted in a performance of {current_strength}")
                            tested_formulations.append(f"{TrainingDat}")
                            
                            
                            valid_solution = True
                            
                            print('The suggested combination achieved a performance of ', current_strength)
                        else:
                            print(f"Iteration {iterations}: No matching lab result found for suggestion {suggested_solution}")
                    else:
                        print(f"Iteration {iterations}: Assistant's response did not contain a valid solution. Trying again.")

                    if not valid_solution:
                        messages[-1]["content"] = iterate_prompt.value + "\nRemember the exact parameter grid: A: {360, 370, 380, 390,400, 410, 420, 430, 440, 450}, B: {0.45, 0.5, 0.55, 0.6}, C: {0.7/0.3, 0.6/0.4, 0.5/0.5}, D: {0, 1}. It is extremly important that you stick to these values and reply in the following format: 'The formulation is A = {your estimate}, B = {your estimate}, C = {your estimate}, D = {your estimate}"
                else:
                    print(f"Iteration {iterations}: Response not from 'assistant'. Trying again.")

            if current_strength >= desired_strength:
                print(f"\nDesired performance of {desired_strength} achieved after {iterations} iterations. The solution is {suggested_solution}.")
                break

        timestamp = str(int(time.time()))

        # create the file name
        if 'None' in prompt_dropdown.value:
            prompt_value = 'NoContext'
        elif 'zero' in prompt_dropdown.value:
            prompt_value = 'ZeroContext'
        else:
            prompt_value = 'Context'

        filename = f"Results/LLM/{model_dropdown.value}_{prompt_value}_prompt_experiment_{experiment+1}_temp_{temp}_target_{int(targ_quant.value)}_%_Dev_Budget_{budget}_recursive_{enable_TVDL_strategy}_{timestamp}{prompt_dropdown.value}.csv"
       
        # open the file in write mode
        with open(filename, 'w', newline='') as file:
            writer = csv.writer(file)

            # write the headers
            writer.writerow(["Formulation", "Compressive Strength"])

            # iterate over the training data
            for data in training_data:
                try:
                    # parse the data to extract formulation and compressive strength
                    formulation, strength_str = data.split(" resulted in a performance of ")
                    strength = float(strength_str.split(" ")[0])  # convert string to float
                    writer.writerow([formulation, strength])
                except ValueError as e:
                    print(f"Failed to parse data: {data}")
                    print(f"Error: {e}")

        print(f"Data for experiment {experiment+1} and temp {temp} successfully saved to {filename}.")



NameError: name 'temperatures' is not defined