In [5]:
# -*- encoding: utf-8 -*-
"""
==============
Classification
==============
The following example shows how to fit a simple classification model with
*auto-sklearn*.
after install autosklearn, please restart runtime for updating sklearn version. 
"""

import sklearn.datasets
import sklearn.metrics

import autosklearn.classification
import threading
import time
from smac.runhistory.runhistory import RunHistory
from IPython.display import clear_output
from autosklearn.metrics import balanced_accuracy, precision, recall, f1
############################################################################
# Data Loading
# ============

X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
    sklearn.model_selection.train_test_split(X, y, random_state=1)

############################################################################
# Build and fit a regressor
# =========================

tmp_runhistory = RunHistory()

def get_smac_object_callback(budget_type):
    def get_smac_object(
        scenario_dict,
        seed,
        ta,
        ta_kwargs,
        metalearning_configurations,
        n_jobs,
        dask_client,
    ):
        from smac.facade.hyperband_facade import HB4AC
        from smac.facade.roar_facade import ROAR
        from smac.facade.smac_ac_facade import SMAC4AC
        from smac.intensification.successive_halving import SuccessiveHalving
        from smac.scenario.scenario import Scenario

        if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
            raise ValueError("Please make sure to guard the code invoking Auto-sklearn by "
                             "`if __name__ == '__main__'` and remove this exception.")

        scenario = Scenario(scenario_dict)
        if len(metalearning_configurations) > 0:
            default_config = scenario.cs.get_default_configuration()
            initial_configurations = [default_config] + metalearning_configurations
        else:
            initial_configurations = None

        ta_kwargs['budget_type'] = budget_type

        return HB4AC(
            scenario=scenario,
            rng=seed,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            initial_design=None,
            initial_configurations=initial_configurations,
            runhistory=tmp_runhistory,
            run_id=seed,
            intensifier=SuccessiveHalving,
            intensifier_kwargs={
                'initial_budget': 10.0,
                'max_budget': 100,
                'eta': 2,
                'min_chall': 1},
            n_jobs=n_jobs,
            dask_client=dask_client,
        )
    return get_smac_object

scoring_functions=[balanced_accuracy, precision, f1]

def get_metric_val(metric, cost):
    return metric._optimum - (metric._sign * cost)

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    metric=autosklearn.metrics.accuracy,
    scoring_functions=scoring_functions,
    tmp_folder='/tmp/autosklearn_classification_example_tmp15',
    output_folder='/tmp/autosklearn_classification_example_out15',
    get_smac_object_callback=get_smac_object_callback("iterations")
)

def run_automl():
  automl.fit(X_train, y_train, dataset_name='breast_cancer')

  ############################################################################
  # Print the final ensemble constructed by auto-sklearn
  # ====================================================

  # print(automl.show_models())

  ###########################################################################
  # Get the Score of the final ensemble
  # ===================================

  predictions = automl.predict(X_test)
  # print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
t = threading.Thread(target=run_automl)
t.start()

history_read = 0
wait_cnt = 0
while True:
  if t.is_alive():
    infos = {}
    for key, val in tmp_runhistory.data.items():
        config_dict = tmp_runhistory.ids_config[key.config_id].get_dictionary()
        classifier = config_dict["classifier:__choice__"]
        if classifier not in infos:
            infos[classifier] = {}
        infos[classifier][key.config_id] = {
            "metrics": {},
            "configs": {}
        }
        for metric in scoring_functions:
            if val.additional_info is not None and metric.name in val.additional_info:
                infos[classifier][key.config_id]["metrics"][metric.name] = get_metric_val(metric, val.additional_info[metric.name])
        # infos[key.config_id]["configs"] = tmp_runhistory.ids_config[key.config_id].get_dictionary()
    print(infos)
    print("\n\n")
  else:
    # clear_output(wait=True)
    # print(automl.show_models())
    predictions = automl.predict(X_test)
    print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
    break

  time.sleep(1)


{}



Exception in thread Thread-4:
Traceback (most recent call last):
  File "/home/mansu/anaconda3/envs/autoML_test/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/mansu/anaconda3/envs/autoML_test/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-5-d628be52a558>", line 99, in run_automl
    automl.fit(X_train, y_train, dataset_name='breast_cancer')
  File "/home/mansu/.local/lib/python3.7/site-packages/autosklearn/estimators.py", line 598, in fit
    dataset_name=dataset_name,
  File "/home/mansu/.local/lib/python3.7/site-packages/autosklearn/estimators.py", line 355, in fit
    output_folder=self.output_folder,
  File "/home/mansu/.local/lib/python3.7/site-packages/autosklearn/estimators.py", line 298, in build_automl
    delete_output_folder_after_terminate=self.delete_output_folder_after_terminate,
  File "/home/mansu/.local/lib/python3.7/site-packages/autosklearn/util/backend.

AttributeError: 'NoneType' object has no attribute 'predict'

In [5]:
print(tmp_runhistory.data)

_id=65, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=50.0), RunValue(cost=0.049645390070921946, time=0.35532283782958984, status=<StatusType.SUCCESS: 1>, starttime=1615261847.927833, endtime=1615261848.2948444, additional_info={'duration': 0.3423933982849121, 'num_run': 66, 'train_loss': 0.0491228070175439, 'configuration_origin': 'Random Search'})), (RunKey(config_id=68, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=100.0), RunValue(cost=0.014184397163120588, time=0.4190406799316406, status=<StatusType.SUCCESS: 1>, starttime=1615261848.9219358, endtime=1615261849.3526757, additional_info={'duration': 0.4051554203033447, 'num_run': 69, 'train_loss': 0.01403508771929829, 'configuration_origin': 'Random Search'})), (RunKey(config_id=69, instance_id='{"task_id": "breast_cancer"}', seed=0, budget=25.0), RunValue(cost=1.0, time=0.39351892471313477, status=<StatusType.CRASHED: 3>, starttime=1615261849.984579, endtime=1615261850.390624, additional_info={'traceback':

In [4]:
print(vars(automl.automl_.runhistory_))

 feature_preprocessor:select_percentile_classification:percentile, Value: 10.344709696649282
  feature_preprocessor:select_percentile_classification:score_func, Value: 'mutual_info'
, 84: Configuration:
  balancing:strategy, Value: 'none'
  classifier:__choice__, Value: 'mlp'
  classifier:mlp:activation, Value: 'tanh'
  classifier:mlp:alpha, Value: 0.022513996645680746
  classifier:mlp:batch_size, Constant: 'auto'
  classifier:mlp:beta_1, Constant: 0.9
  classifier:mlp:beta_2, Constant: 0.999
  classifier:mlp:early_stopping, Value: 'train'
  classifier:mlp:epsilon, Constant: 1e-08
  classifier:mlp:hidden_layer_depth, Value: 3
  classifier:mlp:learning_rate_init, Value: 0.004713954087896932
  classifier:mlp:n_iter_no_change, Constant: 32
  classifier:mlp:num_nodes_per_layer, Value: 41
  classifier:mlp:shuffle, Constant: 'True'
  classifier:mlp:solver, Constant: 'adam'
  classifier:mlp:tol, Constant: 0.0001
  data_preprocessing:categorical_transformer:categorical_encoding:__choice__, Val