In [1]:
import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
from matplotlib import gridspec
import math
from matplotlib import patches
from matplotlib.ticker import FuncFormatter
import math
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import re
import os
import re
import pandas as pd
import numpy as np
from IPython.display import display
import ipywidgets as widgets
from ipywidgets import Layout, Button
import collections

def extract_info(filename):
    pattern = r"(?P<model>.*?)_(?P<prompt>.*?)_prompt_experiment_.*_temp_(?P<temp>.*?)_target_(?P<target>.*?)_%_Dev_Budget_(?P<budget>\d+)_.*"
    match = re.match(pattern, filename)
    if match:
        return match.group('model'), match.group('prompt'), match.group('temp'), match.group('target'), int(match.group('budget'))

def unique_combinations(directory):
    unique_lists = []
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            info = extract_info(filename)
            if info:
                unique_lists.append(info[:-1])
    
    # Sort list based on Temp (float) first, Model (str) second
    unique_lists = list(set(unique_lists)) # Remove duplicates
    unique_lists.sort(key=lambda x: (float(x[2]), x[0])) # Sort
    
    return unique_lists



def load_data(filename, budget):
    df = pd.read_csv(filename)
    strength = df['Compressive Strength'].values
    if len(strength) < budget:
        last_value = strength[-1] if len(strength) > 0 else 0
        strength = np.pad(strength, (0, budget - len(strength)), constant_values=last_value)
    return strength

def load_data_baseline(filename, budget):
    df = pd.read_csv(filename)
    strength = df['fc_28dGroundTruth'].values
    if len(strength) > 4:  # if more than initial samples, drop them
        strength = strength[4:]  # drop first 4 samples
    if len(strength) < budget:
        last_value = strength[-1] if len(strength) > 0 else 0
        strength = np.pad(strength, (0, budget - len(strength)), 'constant', constant_values=last_value)
    return strength

# Adjust load_selected_data function
def load_selected_data(btn):
    data = collections.defaultdict(list)

    for selected in combo_widget.value:
        split_selected = selected.split(", ")

        # For regular models
        if len(split_selected) == 4 and "Prompt" in selected:
            selected_model, selected_prompt, selected_temp, selected_target = split_selected
            selected_model = selected_model.split(": ")[1]
            selected_prompt = selected_prompt.split(": ")[1]
            selected_temp = selected_temp.split(": ")[1]
            selected_target = selected_target.split(": ")[1]

            for filename in os.listdir('Results/ID'):
                info = extract_info(filename)
                if info and info[:-1] == (selected_model, selected_prompt, selected_temp, selected_target):
                    strength = load_data(os.path.join('Results/ID', filename), info[-1])
                    data[info[:-1]].append(strength)

        # For baseline models
        elif len(split_selected) == 4 and "Initial Samples" in selected:
            selected_model, selected_init_samples, selected_target, selected_budget = split_selected
            selected_model = selected_model.split(": ")[1]
            selected_init_samples = selected_init_samples.split(": ")[1]
            selected_target = selected_target.split(": ")[1]
            selected_budget = selected_budget.split(": ")[1]
            for directory in ['Results/BO', 'Results/RF_ID', 'Results/RP_ID']:
                for filename in os.listdir(directory):
                    try:
                        info = extract_info_baseline(filename)
                        strength = load_data_baseline(os.path.join(directory, filename), info[-1])
                        data[info[:-1]].append(strength)
                    except:
                        pass

    return data

    print("Data Loaded Successfully!")
    
    return data
def extract_info_baseline(filename):
    pattern = r"experiment_(?P<experiment>\d+)_(?P<model>.*?)_(initialsample_(?P<initialsample>\d+)?_)?target_(?P<target>.*?)_%_Dev_Budget_(?P<budget>\d+)_.*"
    match = re.match(pattern, filename)
    if match:
        model = match.group('model')
        experiment = int(match.group('experiment'))
        initial_sample_size = int(match.group('initialsample')) if match.group('initialsample') else 0
        target = match.group('target')
        budget = int(match.group('budget'))
        return model, initial_sample_size, target, budget

def unique_combinations_baseline(directory):
    unique_lists = []
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            info = extract_info_baseline(filename)
            if info:
                unique_lists.append(info)
    
    # Sort list based on initial_sample_size (int) first, Model (str) second
    unique_lists = list(set(unique_lists)) # Remove duplicates
    unique_lists.sort(key=lambda x: (x[2], x[0])) # Sort
    
    return unique_lists


# Get the unique combinations for each type of model
unique_sets = []
for directory in directories:
    if directory == 'Results/ID':
        unique_sets += unique_combinations(directory)
    else:
        unique_sets += unique_combinations_baseline(directory)

# Generate the unique list
unique_list = []
for unique_set in unique_sets:
    if "gpt" in unique_set[0]:  # For regular models
        model, prompt, temp, target = unique_set
        unique_list.append(f"Model: {model}, Prompt: {prompt}, Temp: {temp}, Target: {target}")
    else:  # For baseline models
        model, init_samples, target, budget = unique_set
        unique_list.append(f"Model: {model}, Initial Samples: {init_samples}, Target: {target}, Budget: {budget}")

combo_widget = widgets.SelectMultiple(
    options=unique_list,
    description='Combinations:',
    layout=Layout(width='90%')
)

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import math

def plot_results(data, desired_target):
    with plot_output:
        num_configs = len(data.keys())
        ncols = 2
        nrows = int(math.ceil(num_configs / ncols))

        fig, axs = plt.subplots(nrows, ncols, figsize=(15, 5*nrows), sharex=True, sharey=True)
        colors = plt.cm.viridis(np.linspace(0, 1, num_configs))  # color map for different configurations

        # Ensure axs is always a 2D array
        if num_configs <= 2:
            axs = np.reshape(axs, (nrows, ncols))

        y_min = np.inf
        y_max = -np.inf

        for idx, ((config, all_strengths), color) in enumerate(zip(data.items(), colors)):
            row = idx // ncols
            col = idx % ncols

            # Calculate cumulative max for each experiment
            all_strengths = [np.maximum.accumulate(strength) for strength in all_strengths]

            # Calculate the mean and the 10th and 90th percentiles
            mean_strengths = np.mean(all_strengths, axis=0)
            lower_bound = np.percentile(all_strengths, 10, axis=0)
            upper_bound = np.percentile(all_strengths, 90, axis=0)

            iterations = list(range(1, len(mean_strengths) + 1))

            # Plotting
            axs[row, col].plot(iterations, mean_strengths, color=color, label=f'{config}', linewidth=2)
            axs[row, col].fill_between(iterations, lower_bound, upper_bound, alpha=0.3, color=color)

            # Add horizontal line for the desired target strength
            axs[row, col].axhline(y=desired_target, color='r', linestyle='--')

            # Get global minimum and maximum y-values
            y_min = min(y_min, np.min([np.min(str) for str in all_strengths]))
            y_max = max(y_max, np.max([np.max(str) for str in all_strengths]))

            # Add labels, title and legend
            axs[row, col].set_xlabel('Development Cycle')
            axs[row, col].set_ylabel('Compressive Strength')
            axs[row, col].set_title(f'{config}')
            axs[row, col].grid(True)

        # Normalize y-axis for all subplots
        for ax in axs.flat:
            ax.set_ylim([y_min, y_max])

        plt.tight_layout()
        plt.show()
    
import threading
plot_output = widgets.Output()

# Create a lock
lock = threading.Lock()

def on_load_and_plot(btn):
    # Acquire the lock
    if not lock.acquire(blocking=False):
        print('Another session is running, please wait...')
        return

    # Clear previous plots from the Output widget
    plot_output.clear_output(wait=True)

    data = load_selected_data(btn)  # store the returned data in a variable

    # Draw the new plot inside the Output widget
    with plot_output:
        plot_results(data, desired_target=64.4)

    # Release the lock
    lock.release()
    
# Define the button here
load_button = widgets.Button(description='Load Data and Plot')

load_button.on_click(on_load_and_plot)
display(combo_widget, load_button, plot_output)


FileNotFoundError: [Errno 2] No such file or directory: 'Results/ID'