In [1]:
import openml
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder
import pandas as pd

from lightgbm import LGBMRegressor

In [2]:
dataset = openml.datasets.get_dataset(44140)
x, y, categorical_indicator, attribute_names = dataset.get_data(target=dataset.default_target_attribute)
encoder = LabelEncoder()
y = encoder.fit_transform(y)
y = pd.Series(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)


model = LGBMRegressor()

model.set_params(
            boosting_type = 'gbdt',
            learning_rate = 0.05,
            num_leaves = 10,
            n_estimators = 50,
            max_depth = 5,
            n_jobs = 16,
            verbose = -1)
            
model.fit(x_train,y_train)
pred_test = model.predict(x_test)

mse = mean_squared_error(y_test, pred_test, squared = False)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 912.1697033998964




## Create HPO Problem

In [3]:
import sys
sys.path.append('../../..')

from emoc.problem import Problem
import random
import time

class HPO_XGB(Problem):
    def __init__(self, dataset_id=44140, dec_num=5, obj_num_=2):
        super().__init__(dec_num, obj_num_)
        x, y, categorical_indicator, attribute_names = dataset.get_data(target=dataset.default_target_attribute)
        encoder = LabelEncoder()
        y = encoder.fit_transform(y)
        y = pd.Series(y)
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(x, y, test_size=0.2, random_state=1)
        self.model = LGBMRegressor()

        self.space = {
            'boosting_type': ['gbdt', 'dart'],
            'learning_rate': (1e-10, 0.5),
            'num_leaves': [10, 20, 30, 40, 50, 100],
            'n_estimators': [50, 75, 100, 150, 200, 250, 300],
            'max_depth': [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 25, 30, None],
        }
        self.lower_bound_ = []
        self.upper_bound_ = []
        for _, value in self.space.items():
            if isinstance(value, tuple):
                self.lower_bound_.append(value[0])
                self.upper_bound_.append(value[1])
            elif isinstance(value, list):
                self.lower_bound_.append(0)
                self.upper_bound_.append(len(value) - 1)
            else:
                raise Exception("Invalid type for search space!")
        
        self.encoding_ = self.GetType("MIXED")
        self.name = "HPO_LGBM"
        
        
    def CalObj(self, ind):
        self.model.set_params(
            boosting_type = self.space['boosting_type'][int(ind.dec_[0])],
            learning_rate = ind.dec_[1],
            num_leaves = self.space['num_leaves'][int(ind.dec_[2])],
            n_estimators = self.space['n_estimators'][int(ind.dec_[3])],
            max_depth = self.space['max_depth'][int(ind.dec_[4])],
            verbose = -1,
        )
        start = time.time()
        self.model.fit(self.x_train, self.y_train)
        runtime = time.time() - start
        predictions = model.predict(self.x_test)
        error = mean_absolute_error(self.y_test, predictions)
        ind.obj_[0] = error
        ind.obj_[1] = runtime
    
    def check(self, ind): # check if the individual is valid
        pass

In [4]:
hpo = HPO_XGB()
print(type(hpo.space['learning_rate']))
print(type(hpo.space['max_depth']))
print(type(hpo.space['boosting_type']))
print(type(hpo.space['n_estimators']))
print(type(hpo.space['num_leaves']))


print(hpo.lower_bound_, hpo.upper_bound_)

<class 'tuple'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
[0, 1e-10, 0, 0, 0] [1, 0.5, 5, 6, 15]


In [5]:
from emoc.core import Individual

ind = Individual(5,2)
ind.dec_ = [0, 0.1, 0, 0, 0]
hpo = HPO_XGB()

print(type(ind.dec_[0]))

hpo.CalObj(ind)
print(ind.obj_)


<class 'float'>
[610.366, 0.322645]


In [6]:
from emoc.operator import Sampling

class sampling_HPO_XGB(Sampling):
    def sample_ind(self, ind, problem):
        for i, (_, value) in enumerate(problem.space.items()):
            if isinstance(value, tuple):
                ind.dec_[i] = random.uniform(problem.lower_bound_[i], problem.upper_bound_[i])
            elif isinstance(value, list):
                ind.dec_[i] = random.randint(problem.lower_bound_[i], problem.upper_bound_[i])
            else:
                raise Exception("Invalid type for search space!")

# class sampling_HPO_XGB(Sampling):
#     def __init__(self) -> None:
#         pass
    
#     def __call__(self, population, pop_num, problem):
#         for i in range(pop_num):
#             self.sample_ind(population[i], problem)
        
#     def sample_ind(self, ind, problem):
#         for i in range(problem.dec_num_):
#             ind.dec_[i] = random.randint(problem.lower_bound_[i], problem.upper_bound_[i])

In [7]:
from emoc.operator import Mutation

class mutation_HPO_XGB(Mutation):
    def mutation_ind(self, ind, problem):
        for i, (_, value) in enumerate(problem.space.items()):
            if isinstance(value, tuple):
                ind.dec_[i] = random.uniform(problem.lower_bound_[i], problem.upper_bound_[i])
            elif isinstance(value, list):
                ind.dec_[i] = random.randint(problem.lower_bound_[i], problem.upper_bound_[i])
            else:
                raise Exception("Invalid type for search space!")

# class mutation_HPO_XGB(Mutation):
#     def __init__(self, mu_pro=None,):
#         self.pro = mu_pro
    
#     def __call__(self, population, pop_num, problem):
#         if self.pro == None:
#             self.pro = 1 / problem.dec_num_
#         for i in range(pop_num):
#             self.mutation_ind(population[i], problem)
        
#     def mutation_ind(self, ind, problem):
#         for i, (_, value) in enumerate(problem.space.items()):
#             if isinstance(value, tuple):
#                 ind.dec_[i] = random.uniform(problem.lower_bound_[i], problem.upper_bound_[i])
#             elif isinstance(value, list):
#                 ind.dec_[i] = random.randint(problem.lower_bound_[i], problem.upper_bound_[i])
#             else:
#                 raise Exception("Invalid type for search space!")

In [8]:
from emoc.operator import Crossover
class crossover_HPO_XGB(Crossover):
    def cross_ind(self, parent1, parent2, offspring1, offspring2, problem):
        if random.random() > self.pro:
            offspring1.dec_ = parent1.dec_
            offspring2.dec_ = parent2.dec_
        else:
            for i in range(problem.dec_num_):
                if random.random() < 0.5:
                    offspring1.dec_[i] = parent1.dec_[i]
                    offspring2.dec_[i] = parent2.dec_[i]
                else:
                    offspring1.dec_[i] = parent2.dec_[i]
                    offspring2.dec_[i] = parent1.dec_[i]

# class crossover_HPO_XGB(Crossover):
#     def __init__(self, cross_pro=0.9):
#         self.pro = cross_pro
    
#     def __call__(self, parent_pop, offspring_pop, pop_num, problem, selection_operator, **kwargs):
#         index1 = list(range(pop_num))
#         random.shuffle(index1)
#         index2 = list(range(pop_num))
#         random.shuffle(index2)
#         for i in range(pop_num // 2):
#             parent1 = selection_operator(parent_pop[index1[2 * i]], parent_pop[index1[2 * i + 1]])
#             parent2 = selection_operator(parent_pop[index2[2 * i]], parent_pop[index2[2 * i + 1]])
#             self.cross_ind(parent1, parent2, offspring_pop[2 * i], offspring_pop[2 * i + 1], problem)
            
#     def cross_ind(self, parent1, parent2, offspring1, offspring2, problem):
#         if random.random() > self.pro:
#             offspring1.dec_ = parent1.dec_
#             offspring2.dec_ = parent2.dec_
#         else:
#             for i in range(problem.dec_num_):
#                 if random.random() < 0.5:
#                     offspring1.dec_[i] = parent1.dec_[i]
#                     offspring2.dec_[i] = parent2.dec_[i]
#                 else:
#                     offspring1.dec_[i] = parent2.dec_[i]
#                     offspring2.dec_[i] = parent1.dec_[i]

In [9]:
from emoc.algorithm import NSGA2Framework
from emoc.core import EMOC_Manager

my_problem = HPO_XGB()
my_Algorithm = NSGA2Framework(
    sampling=sampling_HPO_XGB(),
    mutation=mutation_HPO_XGB(),
    crossover=crossover_HPO_XGB()
)
emoc = EMOC_Manager(population_num=30, max_evaluation=150)
emoc.optimize(algorithm=my_Algorithm, problem=my_problem, metrics=[], output_interval = 1)

Error: File emoc/pf_data/hpo_lgbm/hpo_lgbm_2D.pf not found or could not be opened!


In [10]:
print(len(emoc.global_[0].record_))
print(len(emoc.global_[0].record_[0].pop_))
print(emoc.global_[0].record_[0].pop_[2].dec_, emoc.global_[0].record_[0].pop_[2].obj_)

5
30
[0, 0.302278, 2, 2, 4] [610.366, 0.130671]
