# Optimization with GPT-3.5/4 and HEBO
We will start by setting up the environment and necessary configurations.

In [None]:
# Environment Setup
import os

# os.environ['HTTP_PROXY'] = '127.0.0.1:7890'
# os.environ['HTTPS_PROXY'] = '127.0.0.1:7890'
import datetime
import functools
import json
import re
import sys
import math
from pathlib import Path
import json_repair

import numpy as np
import openai
from dotenv import load_dotenv, find_dotenv
from loguru import logger

# Load environment variables
_ = load_dotenv(find_dotenv())

# Set ROOT_PATH and other configurations
ROOT_PATH = str(Path('.').resolve())
print('ROOT_PATH: ', ROOT_PATH)
sys.path.insert(0, ROOT_PATH)

import prompt_utils

_OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
_OPTIMIZER = "gpt-3.5-turbo"
openai_api_key = _OPENAI_API_KEY

if _OPTIMIZER in {"gpt-3.5-turbo", "gpt-4o"}:
    openai.api_key = openai_api_key

## Configuration and Logging
We will now configure the optimization parameters and set up logging for debugging and tracking.

In [None]:
# Optimization configuration
num_points = 50
max_num_steps = 3
num_reps = 2
max_num_pairs = 20
num_generated_points_in_each_step = 8

# Set the optimizer configurations
optimizer_llm_name = _OPTIMIZER
optimizer_gpt_max_decode_steps = 1024
optimizer_gpt_temperature = 1.0

# Create the result directory
datetime_str = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
save_folder = os.path.join(ROOT_PATH, "outputs", "optimization-results", f"llm_hebo-o-{optimizer_llm_name}-{datetime_str}/")
os.makedirs(save_folder)
logger.add(save_folder + "log.log", format="{time} {level} {message}", level="DEBUG")
print(f"Result directory:\n{save_folder}")

# Set optimizer LLM dictionary
optimizer_llm_dict = {
    "max_decode_steps": optimizer_gpt_max_decode_steps,
    "temperature": optimizer_gpt_temperature,
    "batch_size": 1
}

import asyncio

call_optimizer_server_func = functools.partial(
    prompt_utils.call_openai_server_func,
    model=optimizer_llm_name,
    max_decode_steps=optimizer_gpt_max_decode_steps,
    temperature=optimizer_gpt_temperature,
)

## Testing the Optimizer Server
Before proceeding with the optimization process, let's test the optimizer server.

In [None]:
# Test the optimizer server
print("\n======== Testing the optimizer server ===========")
optimizer_test_output = asyncio.run(call_optimizer_server_func(
    "Does the sun rise from the north? Just answer yes or no.",
    temperature=1.0
))
print(f"Optimizer test output: {optimizer_test_output}")
print("Finished testing the optimizer server.")
print("\n=================================================")

## Benchmark Settings and Initialization
Next, we will load the benchmark settings and initialize the optimization process.

In [None]:
from hpobench.util.openml_data_manager import get_openmlcc18_taskids
from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark as Benchmark
import time

task_ids = get_openmlcc18_taskids()
task_no, task_id = 0, task_ids[0]
other_info = {}

# Initialize tasks and benchmarks
for task_no, task_id in enumerate(task_ids[:2]):
    print(f'#################### TASK {task_no + 1} of {len(task_ids)}: Task-Id: {task_id} ###################')
    benchmark = Benchmark(task_id=task_id)
    if benchmark:
        start = time.time()
        cs = benchmark.get_configuration_space()
        results = []
        default_bounds = []
        print("Hyperparameter default bounds:")
        for hyperparameter in list(cs.values()):
            name = hyperparameter.name
            lower = hyperparameter.lower
            upper = hyperparameter.upper
            log = hyperparameter.log
            check_int = True if "check_int" in dir(hyperparameter) else False
            if log:
                lower = (math.log(lower, 2))
                upper = (math.log(upper, 2))
            default_bounds.append((lower, upper))
            other_info[name] = [log, check_int]
            print(f"{name}: Lower = {lower}, Upper = {upper}, Check_int = {check_int}, Log_sample = {log}")

In [None]:
(type(benchmark).__name__)

## Utility Functions
Here, we define the utility functions that will be used throughout the optimization process.

In [None]:
from hebo.optimizers.hebo import HEBO
from hebo.design_space.design_space import DesignSpace
from tqdm import tqdm
from banks import Prompt

# Utility functions
def evaluate_loss(benchmark, space_list, fidelity):
    def preprocess_func(df):
        return_dict = {}
        for key in df.columns:
            value = df[key].iloc[0]
            if isinstance(df[key].iloc[0], np.int64):
                value = int(df[key].iloc[0])
            elif isinstance(df[key].iloc[0], np.float64):
                value = float(df[key].iloc[0])
            return_dict[key.split('_log')[0]] = 2.0 ** value if '_log' in key else value
        return return_dict

    def objective(df):
        config = preprocess_func(df)
        result_dict = benchmark.objective_function(config, fidelity=fidelity)
        return result_dict

    logger.info(space_list)
    sp = DesignSpace().parse(space_list)
    opt = HEBO(sp, rand_sample=4)

    for i in tqdm(range(10)):
        try:
            rec = opt.suggest(n_suggestions=5)
            result_dict = objective(rec)
            valid_loss = result_dict['function_value']
            train_loss = result_dict['info']['train_loss']
            y = np.array([-valid_loss], dtype=np.float64)
            opt.observe(rec, y)
        except Exception as e:
            logger.debug(e)
            continue
        logger.info('After %d iterations, best obj is %.2f' % (i, -opt.best_y))
    try:
        best_config = preprocess_func(opt.best_x)
        print("Best params is: ", best_config)
        result_dict_test = benchmark.objective_function_test(best_config)
        test_loss = result_dict_test['function_value']

        return {
            'configuration': best_config,
            'fidelity': fidelity,
            'test_loss': -np.round(test_loss, 3),
            'valid_loss': valid_loss,
            'train_loss': train_loss
        }
    except Exception as e:
        logger.debug(e)
        return {}

def gen_meta_prompt(bound_info, test_loss, old_value_pairs_set, max_num_pairs=100):
    bound_names = bound_info.keys()
    info = tuple((('bound_range', v['bound_range']),
                  ('is_log_sample', v['is_log_sample']),
                  ('is_int', v['is_int']))
                 for v in bound_info.values())
    old_value_pairs_set.add((info, test_loss))

    old_value_pairs = list(old_value_pairs_set)
    old_value_pairs = sorted(old_value_pairs, key=lambda x: -x[1])[-max_num_pairs:]

    old_value_pairs_substr = ""
    for i, pair in enumerate(old_value_pairs):
        old_value_pairs_substr += f"\nSuggestion {i}: "
        infos, test_loss = pair
        for name, info in zip(bound_names, infos):
            old_value_pairs_substr += f"{name} : "
            old_value_pairs_substr += '( ' + ', '.join([f'{key}: {value}' for key, value in info]) + '), '
        old_value_pairs_substr += f' test loss: {test_loss}'

    meta_prompt = """
    As an ML engineer, your task is to provide recommended lower and upper bounds for each hyperparameter in the {algo.name} algorithm. You already have reference data on some ranges and the corresponding test loss for these bounds, with the parameter bounds organized in descending order based on their test loss, where lower values indicate better performance. Analyze each hyperparameter to determine reasonable ranges that optimize model performance, ensuring these bounds are grounded in empirical evidence or established best practices. Your insights will be crucial for refining and optimizing the tuning process for {algo.name} models.
  Here are some previously suggested ranges and their performance:
    """.strip()
    meta_prompt += "\n\n"
    meta_prompt += old_value_pairs_substr.strip()
    meta_prompt += "\n\n"
    meta_prompt += """
    Please provide a new set of recommended lower and upper bounds for each hyperparameter, ensuring that these ranges are different from any previously suggested ranges. Additionally, ensure that the valid loss value associated with these new ranges is lower than any previously mentioned values. Do not write code. 
  Your output must follow this json format:
    """.strip()
    prompt_template = '''
  {
    {% for hyper_param in hyper_params_l %}
    "{{ hyper_param }}": {
        "lower_bound": "your lower_bound here",
        "upper_bound": "your upper_bound here"
    },

    {% endfor %}
  }
    '''
    p=Prompt(prompt_template)
    meta_prompt+=p.text({"hyper_params_l": list(bound_info.keys())})
    meta_prompt+='''
where lower_bound and upper_bound are all numerical values. 

Answer:
```json
  '''
    return meta_prompt

def extract_string(input_string):
  raw_result=input_string.split('```')[0]
  return raw_result

def parse_output(extracted_output):

  if not extracted_output:
    return
  bounds = []
  try:
    bounds_dict=eval(extracted_output)
  except:
    good_json_string = json_repair.repair_json(extracted_output, skip_json_loads=True)
    bounds_dict =json.loads(good_json_string)
  for param_name, range in bounds_dict.items():
      lower_bound=eval(range['lower_bound']) if isinstance(range['lower_bound'],str) else range['lower_bound']
      upper_bound=eval(range['upper_bound']) if isinstance(range['upper_bound'],str) else range['upper_bound']
      bounds.append((lower_bound,upper_bound))
  return bounds
def process_output(bounds,other_info):
  space_list=[]
  bound_info={}
  for hyper_param_info, bound in zip(other_info.items(),bounds):
    param_name,param_info=hyper_param_info
    if param_info[0]: # is_log_sample
      space_dict = {
          'name' : param_name+'_log', 
          'type' : 'int', 
          'lb' : bound[0], 
          'ub' : bound[1],
          }
      bound_info[param_name]={
        'bound_range': (bound[0],bound[1]), "is_log_sample": param_info[0], "is_int": param_info[1]
          }
    else:
      if param_info[1]: # is_int
        space_dict = {
          'name' : param_name, 
          'type' : 'int', 
          'lb' : int(bound[0]),
          'ub' : int(bound[1])}
        bound_info[param_name]={
        'bound_range': (int(bound[0]),int(bound[1])), "is_log_sample": param_info[0], "is_int": param_info[1]
          }

      else:
        space_dict = {
        'name' : param_name, 
        'type' : 'num', 
        'lb' : bound[0], 
        'ub' : bound[1]} 
        bound_info[param_name]={
        'bound_range': (bound[0],bound[1]), "is_log_sample": param_info[0], "is_int": param_info[1]
          }

    space_list.append(space_dict)
  return bound_info,space_list


## Running the Optimization Process
We will now run the optimization process using the configurations and utility functions defined earlier.

In [None]:
configs_dict = dict()
results_dict = dict()
num_convergence_steps = []

for i_rep in range(num_reps):
    found_optimal = False
    print(f"\nRep {i_rep}:")
    
    # Generate the starting points
    init_bounds = default_bounds
    init_fidelity = {'n_estimators': 8, 'dataset_fraction': 0.4}

    configs_dict_single_rep = {
        "optimizer_llm_configs": optimizer_llm_dict,
        "init_bounds": init_bounds,
        "max_num_steps": max_num_steps,
        "max_num_pairs": max_num_pairs,
        "num_generated_points_in_each_step": num_generated_points_in_each_step,
    }
    configs_dict[i_rep] = configs_dict_single_rep
    configs_json_path = os.path.join(save_folder, "configs.json")
    print(f"Saving configs to\n{configs_json_path}")
    with open(configs_json_path, "w") as f:
        json.dump(configs_dict, f, indent=4)

    old_value_pairs_set = set()
    old_value_pairs_with_i_step = []
    meta_prompts_dict = dict()
    raw_outputs_dict = dict()
    init_space_list = []
    bound_info, init_space_list = process_output(init_bounds, other_info)
    init_test_loss = evaluate_loss(benchmark, init_space_list, init_fidelity)['test_loss']
    bound_range = tuple([v['bound_range'] for v in bound_info.values()])
    old_value_pairs_with_i_step.append((bound_range, init_test_loss, -1))

    print("\n================ Run Optimization ==============")

    results_json_path = os.path.join(save_folder, "results.json")
    print(f"Saving results to\n{results_json_path}")
    test_loss = init_test_loss
    for i_step in range(max_num_steps):
        print(f"\nStep {i_step}:")
        meta_prompt = gen_meta_prompt(bound_info, test_loss, old_value_pairs_set, max_num_pairs=max_num_pairs)

        if not i_step % 5:
            print("\n=================================================")
        meta_prompts_dict[i_step] = meta_prompt

        # Generate points
        remaining_num_points_to_generate = num_generated_points_in_each_step
        raw_outputs = []
        while remaining_num_points_to_generate > 0:
            raw_outputs += asyncio.run(call_optimizer_server_func(meta_prompt))
            remaining_num_points_to_generate -= optimizer_llm_dict["batch_size"]
        raw_outputs = raw_outputs[:num_generated_points_in_each_step]
        raw_outputs_dict[i_step] = raw_outputs
        parsed_outputs = []
        for string in raw_outputs:
            try:
                parsed_output = parse_output(extract_string(string))
                if parsed_output is not None:
                    parsed_outputs.append(parsed_output)
            except Exception as e:
                logger.debug(e, string)
        parsed_outputs = [tuple(item) for item in parsed_outputs]
        print(f"Proposed points: {parsed_outputs}")

        single_step_values = []
        for parsed_bounds in parsed_outputs:
            bound_info, space_list = process_output(parsed_bounds, other_info)
            bound_range = tuple([v['bound_range'] for v in bound_info.values()])
            loss = evaluate_loss(benchmark, space_list, init_fidelity)
            if 'test_loss' in loss:
                test_loss=loss['test_loss']
                single_step_values.append(test_loss)
                bound_names=bound_info.keys()
                info=tuple((('bound_range',v['bound_range']),
                        ('is_log_sample',v['is_log_sample']),
                        ('is_int',v['is_int']))
                        for v in bound_info.values())
            old_value_pairs_set.add((info, test_loss))
            old_value_pairs_with_i_step.append((bound_range, test_loss, i_step))
        logger.info(f"Single step values: {single_step_values}")

        results_dict_single_rep = {
            "meta_prompts": meta_prompts_dict,
            "raw_outputs": raw_outputs_dict,
            "old_value_pairs_with_i_step": old_value_pairs_with_i_step,
        }
        results_dict[i_rep] = results_dict_single_rep
        with open(results_json_path, "w") as f:
            json.dump(results_dict, f, indent=4)

# Questions: 
1. how to set HEBO Configs?
2. how to treat value out of default range?
3. how to refine the structure of old_value_pairs_substr rendered in prommpt?
4. error occurs when compute GPs like:
install from source
    - cholesky_cpu: 16 of 16 elements of the torch.Size([4, 4]) tensor are NaN.
    - Matrix not positive definite after repeatedly adding jitter up to 1.0e-04.

regret plot (bosteps, best function value)

another plot the changes of recommendations

the order of suggestions( random, loss value)

format the code



In [None]:
import numpy as np
import pandas as pd
from torch.quasirandom import SobolEngine
from hebo.design_space.design_space import DesignSpace
from hebo.acquisitions.acq import MACE
from hebo.acq_optimizers.evolution_optimizer import EvolutionOpt
from hebo.optimizers.abstract_optimizer import AbstractOptimizer
from typing import Optional

class RandomSearch(AbstractOptimizer):
    support_parallel_opt = True
    support_combinatorial = True
    support_contextual = True

    def __init__(self, space, rand_sample: Optional[int] = None, scramble_seed: Optional[int] = None):
        super().__init__(space)
        self.space = space
        self.X = pd.DataFrame(columns=self.space.para_names)
        self.y = np.zeros((0, 1))
        self.rand_sample = 1 + self.space.num_paras if rand_sample is None else max(2, rand_sample)
        self.scramble_seed = scramble_seed
        self.sobol = SobolEngine(self.space.num_paras, scramble=True, seed=scramble_seed)

    def quasi_sample(self, n, fix_input=None):
        samp = self.sobol.draw(n)
        samp = samp * (self.space.opt_ub - self.space.opt_lb) + self.space.opt_lb
        x = samp[:, :self.space.num_numeric]
        xe = samp[:, self.space.num_numeric:]
        for i, n in enumerate(self.space.numeric_names):
            if self.space.paras[n].is_discrete_after_transform:
                x[:, i] = x[:, i].round()
        df_samp = self.space.inverse_transform(x, xe)
        if fix_input is not None:
            for k, v in fix_input.items():
                df_samp[k] = v
        return df_samp

    def suggest(self, n_suggestions=1, fix_input=None):
        sample = self.quasi_sample(n_suggestions, fix_input)
        return sample

    def observe(self, X, y):
        
        valid_id = np.where(np.isfinite(y.reshape(-1)))[0].tolist()
        XX = X.iloc[valid_id]
        yy = y[valid_id].reshape(-1, 1)
        self.X = pd.concat([self.X, XX], axis=0, ignore_index=True)
        self.y = np.vstack([self.y, yy])

    @property
    def best_x(self) -> pd.DataFrame:
        if self.X.shape[0] == 0:
            raise RuntimeError('No data has been observed!')
        else:
            return self.X.iloc[[self.y.argmin()]]

    @property
    def best_y(self) -> float:
        if self.X.shape[0] == 0:
            raise RuntimeError('No data has been observed!')
        else:
            return self.y.min()



In [None]:
from hebo.design_space.design_space import DesignSpace

# Example Usage
# Assuming 'space' is already defined as an instance of DesignSpace
space = DesignSpace().parse([
    {"name":"learning_rate", "type": "num", "lb": 0.001, "ub": 0.1},
    {"name":"batch_size","type": "int", "lb": 32, "ub": 128},
    {"name":"num_layers", "type": "int", "lb": 1, "ub": 5},
    # Add more hyperparameters as needed
])




In [None]:
# Initialize Random Search Optimizer
def evaluate(cfg):
    return 0.1
opt = RandomSearch(space)

# Number of suggestions to generate
n_suggestions = 10

# Get suggestions
# suggestions = random_search.suggest(n_suggestions=n_suggestions)
# Evaluate suggestions and observe
# Assuming 'evaluate' is a function that takes a configuration and returns a performance score
for i in range(10):
    rec_x = opt.suggest(n_suggestions=1)
    y = np.array([evaluate(rec_x)], dtype=np.float64).reshape(-1, 1)
    opt.observe(rec_x, y)
    print(rec_x)

    # # Print the best configuration found
    # print("Best configuration found:")
    # print(random_search.best_x)
    # print("Best score:", random_search.best_y)

In [8]:
import matplotlib.pyplot as plt
import warnings
import torch
import json_repair
from pathlib import Path
ROOT_PATH = Path.cwd().resolve()

from loguru import logger
from banks import Prompt
import math
import sys
import re
import json
import functools
import datetime
from tqdm import tqdm
from hebo.design_space.design_space import DesignSpace
from hebo.optimizers.hebo import HEBO
from hebo.optimizers.bo import BO
import ConfigSpace
from random_search import RandomSearch
from hpobench.benchmarks.ml.xgboost_benchmark_old import XGBoostBenchmark
import time
from hpobench.util.openml_data_manager import get_openmlcc18_taskids
import asyncio
import prompt_utils
from dotenv import load_dotenv, find_dotenv
import openai
import numpy as np
import torch
import pandas as pd
import random

RANDOM_SEED=42


np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)  # If using GPU
random.seed(RANDOM_SEED)

def parse_json(rsp):
    pass

def parse_code(rsp):
    pass

script_path=ROOT_PATH / 'kaggle/titanic/titanic.py'

optimizer_gpt_max_decode_steps = 1024
optimizer_gpt_temperature = 1.0

optimizer_llm_dict = dict()
optimizer_llm_dict["max_decode_steps"] = optimizer_gpt_max_decode_steps
optimizer_llm_dict["temperature"] = optimizer_gpt_temperature


call_optimizer_server_func = functools.partial(
    prompt_utils.call_openai_server_func,
    model='gpt-3.5-turbo',
    max_decode_steps=optimizer_gpt_max_decode_steps,
    temperature=optimizer_gpt_temperature,
)

# # ====================== try calling the servers ============================
# print("\n======== testing the optimizer server ===========")
# optimizer_test_output = call_optimizer_server_func(
#     "Does the sun rise from the north? Just answer yes or no.",
#     temperature=1.0
# )
# print(f"optimizer test output: {optimizer_test_output}")
# print("Finished testing the optimizer server.")
# print("\n=================================================")

with open(script_path,'r') as f:
    script=f.read()

data_part,model_part=script.split('### Model ###')

 "\n\nX_train = StandardScaler().fit_transform(df_train.drop(columns=drop_cols))\ny_train = df_train['Survived'].values\nX_test = StandardScaler().fit_transform(df_test.drop(columns=drop_cols))\n\n# ### **3.1 Random Forest**\n\nsingle_best_model = RandomForestClassifier(criterion='gini',\n                                           n_estimators=1100,\n                                           max_depth=5,\n                                           min_samples_split=4,\n                                           min_samples_leaf=5,\n                                           max_features='auto',\n                                           oob_score=True,\n                                           random_state=SEED,\n                                           n_jobs=-1,\n                                           verbose=1)\n\n\n# `StratifiedKFold` is used for stratifying the target variable. The folds are made by preserving the percentage of samples for each class in target variable (

In [None]:
summary_prompt='''
## Data related code ##
{code}

As an experienced data scientist, please summarize the main aspects of the code provided. Your summary should include:

Key Steps: Describe the primary steps and processes carried out in the code.
Key Findings: Highlight the main results or outcomes derived from the code.
Your summary should be formal and clear.
'''

search_space_prompt='''
## Data preprocessing report ## 
{report}
## ML model docstring ##
{docstring}

You are an experienced machine learning engineer. Using the data preprocessing report and the ML model's docstring, identify key hyperparameters that should be optimized through Bayesian Optimization (BO). For each hyperparameter you identify, specify it and recommend a corresponding search space.

Answer in the following format:
```json
{
  "hp_name": "name of the hyperparameter",
  "hp_type": "choose from [int, num (float), bool, pow (varies in log space), cat (categorical value)]",
  "search_space": ["lower_bound", "upper_bound"] if numerical values, else ["categorical_list"]
}
```
'''