In [1]:
%load_ext autoreload
%autoreload 2

In [7]:

from loguru import logger
from mlflow import MlflowClient
from mlutils.datasets.dataset import Dataset
from mlutils.mlflow.utils import get_run_params
from mlutils.mlflow.utils import terminate_run, finish_run_and_print_exception
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.optimize import minimize
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import Perceptron

from scalarizing.utils import extract_classifiers_from_bagging

In [6]:
from scalarizing.scalarizing import FindingBestExpressionSingleDatasetProblem, FindingBestExpressionProblemMutation, \
    FindingBestExpressionProblemCrossover, FindingBestExpressionProblemSampling
from scalarizing.scoring_functions import default_scoring_function

In [29]:
def do_experiment(run_id):
    client = MlflowClient()
    param = get_run_params(run_id, client)
    logger.info(param)

    try:
        train_path = param.train_path.replace('tra', 'tst')
        name = param.train_path.split("/")[-1].split('-')[0]
        dataset = Dataset.read_dataset(param.train_path, train_path, name) \
            .encode_x_to_labels() \
            .encode_y_to_numeric_labels()

        # bagging
        bagging = BaggingClassifier(
            base_estimator=Perceptron(random_state=42),
                                    n_estimators=int(param.ensemble_size),
            max_samples=0.3,
            random_state=42)
        bagging.fit(dataset.train.x, dataset.train.y)

        problem = FindingBestExpressionSingleDatasetProblem(
            dataset.train,
            extract_classifiers_from_bagging(bagging),
            ensemble_size=int(param.final_ensemble_size),
            scoring_function=default_scoring_function)

        result = minimize(problem,
                          GA(
                              pop_size=int(param.pop_size) if "pop_size" in param else 100,
                              verbose=True,
                              seed=42,
                              eliminate_duplicates=False,
                              mutation=FindingBestExpressionProblemMutation(),
                              crossover=FindingBestExpressionProblemCrossover(),
                              sampling=FindingBestExpressionProblemSampling()
                          ),
                          ("n_gen", int(param.n_gen) if "n_gen" in param else 300),
                          verbose=True,
                          save_history=False,
                          seed=42)

        avg_accuracy = result.F[0]

        client.log_metric(run_id, "accuracy", avg_accuracy)
        terminate_run(run_id, client=client)
    except Exception as e:
        finish_run_and_print_exception(run_id, e, client = client)