In [1]:
# Importamos las librerias necesarias
import autosklearn.classification as auto_c
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Carga de datos
df = pd.read_csv('cleaned_glass.csv')

<br>
<h3 align='center'>Preparación de los datos 👈</h3>

In [3]:
# Separamos datos numéricos
numeric = df.select_dtypes(include=np.number).columns.to_list()

# Eliminamos las columnas index y Type
numeric.remove('index')
numeric.remove('Type')  

# Declaramos X e Y
x = df[numeric]     
y = df['Type']

# Seteamos el conjunto de datos de entrenamiento y de test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=5)

<br>
<h3 align='center'>Auto-ML 👈</h3>

In [4]:
# Preparamos el automl con una duración de 180s, con cada proceso en 30s

automl = auto_c.AutoSklearnClassifier(
    time_left_for_this_task=180,
    per_run_time_limit=30,
)
automl.fit(x_train, y_train, dataset_name='cleaned_glass.csv')

<br>
<h3 align='center'>Resultados 👈</h3>

In [5]:
# Mostramos un resumen general del resultado obtenido
print(automl.sprint_statistics())

auto-sklearn results:
  Dataset name: cleaned_glass.csv
  Metric: accuracy
  Best validation score: 0.771930
  Number of target algorithm runs: 71
  Number of successful target algorithm runs: 71
  Number of crashed target algorithm runs: 0
  Number of target algorithms that exceeded the time limit: 0
  Number of target algorithms that exceeded the memory limit: 0



In [6]:
# Mostramos la tabla de modelos ejecutados
print(automl.leaderboard())

          rank  ensemble_weight                 type      cost  duration
model_id                                                                
2            1             0.02        random_forest  0.228070  0.939242
69           2             0.06        random_forest  0.228070  0.962998
64           3             0.06        random_forest  0.228070  1.027897
61           4             0.06        random_forest  0.228070  0.944397
56           5             0.02        random_forest  0.228070  0.902526
54           6             0.02        random_forest  0.228070  0.936621
48           7             0.06        random_forest  0.228070  0.925887
70           8             0.02        random_forest  0.245614  1.042530
41           9             0.04        random_forest  0.245614  1.032319
9           10             0.02        random_forest  0.245614  0.958224
40          11             0.02        random_forest  0.263158  1.035198
33          12             0.02        random_fores

In [7]:
# Mostramos los modelos con sus respectivos hiperparámetros
print(automl.show_models())

{2: {'model_id': 2, 'rank': 1, 'cost': 0.22807017543859653, 'ensemble_weight': 0.02, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7fd0951f9ed0>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7fd0499965f0>, 'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7fd049997f40>, 'sklearn_classifier': RandomForestClassifier(max_features=3, n_estimators=512, n_jobs=1,
                       random_state=1, warm_start=True)}, 48: {'model_id': 48, 'rank': 2, 'cost': 0.22807017543859653, 'ensemble_weight': 0.06, 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7fd0499965c0>, 'balancing': Balancing(random_state=1), 'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7

<br>
<h3 align="center">Modelo a utilizar con sus respectivos hiperparámetros 👈</h3>

In [8]:
# Mostramos en detalle los hiperparámetros del mejor modelo que se ajusta a los datos
automl.cv_results_['params'][np.argmax(automl.cv_results_['mean_test_score'])]

{'balancing:strategy': 'none',
 'classifier:__choice__': 'random_forest',
 'data_preprocessor:__choice__': 'feature_type',
 'feature_preprocessor:__choice__': 'no_preprocessing',
 'classifier:random_forest:bootstrap': 'True',
 'classifier:random_forest:criterion': 'gini',
 'classifier:random_forest:max_depth': 'None',
 'classifier:random_forest:max_features': 0.5,
 'classifier:random_forest:max_leaf_nodes': 'None',
 'classifier:random_forest:min_impurity_decrease': 0.0,
 'classifier:random_forest:min_samples_leaf': 1,
 'classifier:random_forest:min_samples_split': 2,
 'classifier:random_forest:min_weight_fraction_leaf': 0.0,
 'data_preprocessor:feature_type:categorical_transformer:categorical_encoding:__choice__': 'one_hot_encoding',
 'data_preprocessor:feature_type:categorical_transformer:category_coalescence:__choice__': 'minority_coalescer',
 'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean',
 'data_preprocessor:feature_type:numerical_transformer:res