In [1]:
## Auto AI pieces
# - Profiler: https://www.geeksforgeeks.org/data-profiling-in-pandas-using-python/
# - AutoAI: https://www.kdnuggets.com/2019/01/automated-machine-learning-python.html
# - lale: https://nbviewer.jupyter.org/github/IBM/lale/blob/master/examples/docs_guide_for_sklearn_users.ipynb
# - auto-sklearn: https://automl.github.io/auto-sklearn/master/examples/20_basic/example_classification.html#sphx-glr-examples-20-basic-example-classification-py

## Data Profiling

In [2]:
## Install as needed
#!pip install -U pandas-profiling

In [3]:
import pandas as pd 
import pandas_profiling 

In [4]:
# Local data cache
datadir = '../common-data/covid/'
datafile = datadir + "data.csv"

In [5]:
# Read data from local file
data = pd.read_csv(datafile, parse_dates=['date'])
data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [6]:
# Run the profile report 
profile = data.profile_report(title='Pandas Profiling Report') 

In [7]:
# Saving the report as html file 
profile.to_file(output_file="data/coviddata_profiling.html") 

Summarize dataset:   0%|          | 0/19 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## Lale - select method and tune paramaters

In [8]:
## Install if needed
#! pip install lale

In [9]:
# Import the datasets
import lale.datasets

In [10]:
# Load a data and see inside
(train_X, train_y), (test_X, test_y) = lale.datasets.california_housing_df()
data = pd.concat([train_X.head(), train_y.head()], axis=1)

In [11]:
data.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,3.34502,33.0,5.081655,1.040112,1364.2,2.889498,34.122,-118.516,1.6952
std,0.85071,17.363755,0.903396,0.057128,575.154936,0.944104,1.697254,1.566662,1.232384
min,1.9425,4.0,4.002817,0.985119,874.0,1.738095,32.69,-120.48,0.934
25%,3.2596,33.0,4.473545,1.006421,915.0,2.3,32.71,-119.8,0.965
50%,3.5542,36.0,5.017657,1.033803,1314.0,2.723214,33.77,-118.16,1.03
75%,3.8125,43.0,5.645833,1.041005,1418.0,3.691814,34.66,-117.11,1.726
max,4.1563,49.0,6.268421,1.134211,2300.0,3.994366,36.78,-117.03,3.821


In [12]:
import numpy as np

import sklearn.metrics
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeRegressor as Tree
from sklearn.pipeline import Pipeline

from lale.lib.lale import Hyperopt

  self.re = re.compile(self.reString)


In [13]:
lale.wrap_imported_operators()

INFO:lale.operator_wrapper:Lale:Wrapped known operator:PCA
INFO:lale.operator_wrapper:Lale:Wrapped known operator:Tree
INFO:lale.operator_wrapper:Lale:Wrapped known operator:Pipeline


In [14]:
pca_tree_planned = Pipeline(steps=[('tfm', PCA), ('estim', Tree)])

In [15]:
%%time
pca_tree_trained = pca_tree_planned.auto_configure(
    train_X, train_y, optimizer=Hyperopt, cv=3, max_evals=10, verbose=True)

INFO:lale.schema_simplifier:simplifyAny: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional h

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', 'type': 'boolean'}, 'svd_solver': {'default': 'auto', 'description': 'Algorithm to use.', 'enum'

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', 'type': 'boolean'}, 'svd_solver': {'default': 'auto', 'description': 'Algorithm to use.', 'enum'

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['arpack']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', 'type': 'boolean'}, 'svd_solver': {'default': 'auto', 'description': 'Algorithm to use.', 'enum'

INFO:lale.schema_simplifier:simplifyAll: [range]: {'not': {'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}} is not a satisfiable schema for the optimizer, since it negates everything, falsifying the entire combined schema {'allOf': [{'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, {'not': {'enum': ['mle']}}, {'not': {'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}}]}
INFO:lale.schema_simplifier:simplifyAll: required key n_components is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['arpack']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key n_components is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full']}, {'enum': ['arpack']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusi

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the 

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the 

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the 

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False}, 'whiten': {'default':

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False}, 'whiten': {'default':

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full']}, {'enum': ['arpack']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['arpack']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'propert

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['full']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 't

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.search.schema2search_space:Ignoring Duplicate SearchSpace entry ["return hp.uniform('lale.lib.sklearn.pca.PCA_n_components', 5e-324, 0.999999999999999

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 8, 'forOptimizer': False, 'description': 'Consider max_features features at each split.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simpli

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 8, 'forOptimizer': False, 'description': 'Consider max_features features at each split.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'ty

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': Fa

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', 'type': 'boolean'}, 'svd_solver': {'default': 'auto', 'description': 'Algorithm to use.', 'enum'

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full']}, {'enum': ['arpack']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure 

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', 'type': 'boolean'}, 'svd_solver': {'default': 'auto', 'description': 'Algorithm to use.', 'enum'

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['full']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'enum': [None, 'mle']}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide b

INFO:lale.schema_simplifier:simplifyAll: [range]: {'not': {'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}} is not a satisfiable schema for the optimizer, since it negates everything, falsifying the entire combined schema {'allOf': [{'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, {'not': {'enum': ['mle']}}, {'not': {'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}}]}
INFO:lale.schema_simplifier:simplifyAll: required key n_components is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments

INFO:lale.schema_simplifier:simplifyAll: required key n_components is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', 'type': 'boolean'}, 'svd_solver': {'default': 'auto', 'description': 'Al

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exc

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the 

INFO:lale.schema_simplifier:simplifyAll: [range]: {'not': {'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}} is not a satisfiable schema for the optimizer, since it negates everything, falsifying the entire combined schema {'allOf': [{'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, {'not': {'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}}]}
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['arpack']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfi

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified percentage.', 'type': 'number', 'minimum': 0.0, 'exclusiveMinimum': True, 'maximum': 1.0, 'exclusiveMaximum': True}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the 

INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['full']}, {'enum': ['arpack']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Select the number of components such that the amount of variance that needs to be explained is greater than the specified pe

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['arpack']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'propertie

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full']}, {'enum': ['arpack']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: [{'default': 'auto', 'description': 'Algorithm to use.', 'enum': ['auto', 'full', 'arpack', 'randomized']}, {'enum': ['full', 'auto']}, {'enum': ['randomized']}] is not a satisfiable list of conjoined schemas because the enumeration ['auto', 'full', 'arpack', 'randomized'] has no elements that are satisfiable by the conjoined schemas
INFO:lale.schema_simplifier:simplifyAll: required key svd_solver is False, so the entire conjugation of schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'pro

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', '

INFO:lale.schema_simplifier:mergeAll: conflicting description fields: This class does not support sparse input. See TruncatedSVD for an alternative with sparse data. and This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.
 found when merging schemas [{'description': 'This class does not support sparse input. See TruncatedSVD for an alternative with sparse data.', 'type': 'object'}, {'description': 'This first object lists all constructor arguments with their types, but omits constraints for conditional hyperparameters.\n', 'type': 'object', 'properties': {'n_components': {'description': 'Number of components to keep.', 'type': 'integer', 'minimum': 1, 'maximum': 8, 'forOptimizer': False}, 'whiten': {'default': False, 'description': 'When true, multiply the components vectors by the square root of\nn_samples and then divide by the singular values to ensure uncorrelated\noutputs with unit component-wise variances.', '

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 8, 'forOptimizer': False, 'description': 'Consider max_features features at each split.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 8, 'forOptimizer': False, 'description': 'Consider max_features features at each split.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'int

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplif

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 8, 'forOptimizer': False, 'description': 'Consider max_features features at each split.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 1, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_leaf as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'typ

INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 11008, 'forOptimizer': False, 'description': 'Consider min_samples_split as the minimum number.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'type': 'integer', 'minimum': 2, 'maximum': 8, 'forOptimizer': False, 'description': 'Consider max_features features at each split.'} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: skipping not for optimizer {'enum': ['mae'], 'forOptimizer': False} (after simplification)
INFO:lale.schema_simplifier:simplifyAll: s

  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.015989 seconds
INFO:hyperopt.tpe:TPE using 0 trials


 10%|█         | 1/10 [00:00<00:05,  1.73trial/s, best loss: -1.2157210141768479e-05]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.019437 seconds
INFO:hyperopt.tpe:TPE using 1/1 trials with best loss -0.000012


 20%|██        | 2/10 [00:00<00:03,  2.31trial/s, best loss: -1.2157210141768479e-05]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.012362 seconds
INFO:hyperopt.tpe:TPE using 2/2 trials with best loss -0.000012


 30%|███       | 3/10 [00:01<00:02,  2.39trial/s, best loss: -0.30982475250307284]   

INFO:hyperopt.tpe:build_posterior_wrapper took 0.021200 seconds
INFO:hyperopt.tpe:TPE using 3/3 trials with best loss -0.309825


 40%|████      | 4/10 [00:01<00:02,  2.37trial/s, best loss: -0.30982475250307284]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.020585 seconds
INFO:hyperopt.tpe:TPE using 4/4 trials with best loss -0.309825


 50%|█████     | 5/10 [00:02<00:02,  2.35trial/s, best loss: -0.30982475250307284]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.018151 seconds
INFO:hyperopt.tpe:TPE using 5/5 trials with best loss -0.309825


 60%|██████    | 6/10 [00:02<00:01,  2.24trial/s, best loss: -0.3689100400828495] 

INFO:hyperopt.tpe:build_posterior_wrapper took 0.019901 seconds
INFO:hyperopt.tpe:TPE using 6/6 trials with best loss -0.368910


 70%|███████   | 7/10 [00:03<00:01,  2.34trial/s, best loss: -0.41410769000479447]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.014560 seconds
INFO:hyperopt.tpe:TPE using 7/7 trials with best loss -0.414108


 80%|████████  | 8/10 [00:03<00:00,  2.43trial/s, best loss: -0.41410769000479447]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.018215 seconds
INFO:hyperopt.tpe:TPE using 8/8 trials with best loss -0.414108


 90%|█████████ | 9/10 [00:03<00:00,  2.34trial/s, best loss: -0.41410769000479447]

INFO:hyperopt.tpe:build_posterior_wrapper took 0.019009 seconds
INFO:hyperopt.tpe:TPE using 9/9 trials with best loss -0.414108


100%|██████████| 10/10 [00:04<00:00,  2.34trial/s, best loss: -0.41410769000479447]
CPU times: user 44.5 s, sys: 10.3 s, total: 54.7 s
Wall time: 5.72 s


In [16]:
# Predictions
predicted = pca_tree_trained.predict(test_X)
print(f'R2 score {sklearn.metrics.r2_score(test_y, predicted):.2f}')


R2 score 0.37


## Automated Algo Selection

In [17]:
## Install and other issues
# !pip install auto-sklearn
## Note: If you face issue with swig, install with brew install swig

In [18]:
#!pip install -U scikit-learn
#!pip install auto-sklearn

In [19]:
import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

In [20]:
X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = \
    sklearn.model_selection.train_test_split(X, y, random_state=1)

In [21]:
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    tmp_folder='data/autosklearn_classification_example_tmp',
    output_folder='data/autosklearn_classification_example_out',
)
automl.fit(X_train, y_train, dataset_name='breast_cancer')

AutoSklearnClassifier(output_folder='data/autosklearn_classification_example_out',
                      per_run_time_limit=30, time_left_for_this_task=120,
                      tmp_folder='data/autosklearn_classification_example_tmp')

In [22]:
# Print the model
print(automl.show_models())

[(0.160000, SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'passive_aggressive', 'data_preprocessing:categorical_transformer:categorical_encoding:__choice__': 'no_encoding', 'data_preprocessing:categorical_transformer:category_coalescence:__choice__': 'minority_coalescer', 'data_preprocessing:numerical_transformer:imputation:strategy': 'most_frequent', 'data_preprocessing:numerical_transformer:rescaling:__choice__': 'none', 'feature_preprocessor:__choice__': 'no_preprocessing', 'classifier:passive_aggressive:C': 0.14268277711454813, 'classifier:passive_aggressive:average': 'False', 'classifier:passive_aggressive:fit_intercept': 'True', 'classifier:passive_aggressive:loss': 'hinge', 'classifier:passive_aggressive:tol': 0.0002600768160857831, 'data_preprocessing:categorical_transformer:category_coalescence:minority_coalescer:minimum_fraction': 0.0070580904199417415},
dataset_properties={
  'task': 1,
  'sparse': False,
  'multilabel': False,
  'multi

In [23]:
# Performance metric
predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Accuracy score: 0.9440559440559441
