
# Classification

The following example shows how to fit a simple classification model with
*auto-sklearn*.


In [1]:
from pprint import pprint
import sklearn.metrics
import autosklearn.classification
import timeit
import pandas as pd
from sklearn.model_selection import train_test_split
import autosklearn

## Data Loading



In [2]:

file_path = '../data/airbnb.csv'
df = pd.read_csv(file_path)
df = df.dropna(subset=['Rating'])
y = df['Rating']
X = df.drop('Rating', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Build and fit a classifier



In [3]:
start_time = timeit.default_timer()

automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=120,
    per_run_time_limit=30,
    include = {
        'classifier': ["random_forest"],
        'feature_preprocessor': ["no_preprocessing"],
    },
    tmp_folder="tmp/autosklearn_classification_example_tmp4",
)

automl.fit(X_train, y_train, dataset_name="airbnb")
run_key = list(automl.automl_.runhistory_.data.keys())[0]
run_value = automl.automl_.runhistory_.data[run_key]
config=automl.automl_.runhistory_.ids_config[run_key.config_id]
print(config)


end_time = timeit.default_timer()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")

  if is_sparse(X[column]):
Fitting to the training data:  10%|[32m█         [0m| 12/120 [00:12<01:48,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data:  36%|[32m███▌      [0m| 43/120 [00:43<01:17,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data:  62%|[32m██████▏   [0m| 74/120 [01:14<00:46,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data:  62%|[32m██████▎   [0m| 75/120 [01:15<00:45,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data:  63%|[32m██████▎   [0m| 76/120 [01:16<00:44,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data:  89%|[32m████████▉ [0m| 107/120 [01:47<00:13,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data:  91%|[32m█████████ [0m| 109/120 [01:49<00:11,  1.00s/it, The total time budget for this task is 0:02:00]



Fitting to the training data: 100%|[32m██████████[0m| 120/120 [01:50<00:00,  1.09it/s, The total time budget for this task is 0:02:00]


Configuration(values={
  'balancing:strategy': 'none',
  'classifier:__choice__': 'random_forest',
  'classifier:random_forest:bootstrap': 'True',
  'classifier:random_forest:criterion': 'gini',
  'classifier:random_forest:max_depth': 'None',
  'classifier:random_forest:max_features': 0.5,
  'classifier:random_forest:max_leaf_nodes': 'None',
  'classifier:random_forest:min_impurity_decrease': 0.0,
  'classifier:random_forest:min_samples_leaf': 1,
  'classifier:random_forest:min_samples_split': 2,
  'classifier:random_forest:min_weight_fraction_leaf': 0.0,
  'data_preprocessor:__choice__': 'feature_type',
  'data_preprocessor:feature_type:numerical_transformer:imputation:strategy': 'mean',
  'data_preprocessor:feature_type:numerical_transformer:rescaling:__choice__': 'standardize',
  'data_preprocessor:feature_type:text_transformer:text_encoding:__choice__': 'tfidf_encoding',
  'data_preprocessor:feature_type:text_transformer:text_encoding:tfidf_encoding:analyzer': 'char',
  'data_prepr

## View the models found by auto-sklearn



In [4]:
print(automl.leaderboard())

          rank  ensemble_weight  type  cost duration
model_id                                            
1            1              1.0  <NA>  <NA>     <NA>


## Print the final ensemble constructed by auto-sklearn



In [5]:
pprint(automl.show_models(), indent=4)

RuntimeError: No model found. Try increasing 'time_left_for_this_task'.

## Get the Score of the final ensemble



In [6]:
start_time = timeit.default_timer()

predictions = automl.predict(X_test)

end_time = timeit.default_timer()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))

Execution time: 0.01797708599769976 seconds
Accuracy score: 0.32406808877397114
