In [7]:
import pandas as pd
import numpy as np
import os
import re
from collections import defaultdict
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import Layout
import collections


In [10]:
def extract_info_baseline(filename):
    pattern = r"experiment_(?P<experiment>\d+)_(?P<model>.*?)_(initialsample_(?P<initialsample>\d+)?_)?target_(?P<target>.*?)_%_Dev_Budget_(?P<budget>\d+)_.*"
    match = re.match(pattern, filename)
    if match:
        model = match.group('model')
        experiment = int(match.group('experiment'))
        initial_sample_size = int(match.group('initialsample')) if match.group('initialsample') else 0
        target = match.group('target')
        budget = int(match.group('budget'))
        return model, initial_sample_size, target, budget

def unique_combinations_baseline(directory):
    unique_lists = []
    for filename in os.listdir(directory):
        if os.path.isfile(os.path.join(directory, filename)):
            info = extract_info_baseline(filename)
            if info:
                unique_lists.append(info)
    
    # Sort list based on initial_sample_size (int) first, Model (str) second
    unique_lists = list(set(unique_lists)) # Remove duplicates
    unique_lists.sort(key=lambda x: (x[2], x[0])) # Sort
    
    return unique_lists

directories = ['Results/BO', 'Results/RF_ID', 'Results/RP_ID']

def load_data_baseline(filename, budget):
    df = pd.read_csv(filename)
    strength = df['fc_28dGroundTruth'].values
    if len(strength) > 4:  # if more than initial samples, drop them
        strength = strength[4:]  # drop first 4 samples
    if len(strength) < budget:
        last_value = strength[-1] if len(strength) > 0 else 0
        strength = np.pad(strength, (0, budget - len(strength)), 'constant', constant_values=last_value)
    return strength

def load_selected_data_baseline(btn):
    directories = ['Results/BO', 'Results/RF_ID', 'Results/RP_ID']
    data = collections.defaultdict(list)

    for selected in combo_widget.value:
        selected_model, selected_prompt, selected_temp, selected_target = selected.split(", ")
        selected_model = selected_model.split(": ")[1]
        selected_prompt = selected_prompt.split(": ")[1]
        selected_temp = selected_temp.split(": ")[1]
        selected_target = selected_target.split(": ")[1]


        for directory in directories:
            for filename in os.listdir(directory):
                info = extract_info_baseline(filename)
                if info and info[:-1] == [selected_model, int(selected_target), float(selected_temp)]:
                    strength = load_data_baseline(os.path.join(directory, filename), info[-1])
                    data[info[:-1]].append(strength)
    
    print("Data Loaded Successfully!")
    print(data)
    return data

directories = ['Results/BO', 'Results/RF_ID', 'Results/RP_ID']
unique_sets = []
for directory in directories:
    unique_sets += unique_combinations_baseline(directory)

unique_list = [f"Model: {model}, Temp: {temp}, Target: {target}, Budget: {budget}" for model, budget, target, temp in unique_sets]

combo_widget = widgets.SelectMultiple(
    options=unique_list,
    description='Combinations:',
    layout=Layout(width='90%')
)

btn = widgets.Button(description="Load Data")
btn.on_click(load_selected_data_baseline)
display(combo_widget, btn)


('BO', 4, '99', 10)
('RF', 4, '99', 10)
('RP', 0, '99', 10)
