In [2]:
!pip3 install auto-sklearn

In [1]:
%matplotlib inline

In [3]:
import autosklearn.classification
import numpy as np
import pandas as pd
import sklearn.metrics
from autosklearn.metrics import accuracy, balanced_accuracy, precision, recall, f1, roc_auc

# Define Score Function


In [4]:
def error(solution, prediction):
    # custom function defining error
    return np.mean(solution != prediction)

def get_metric_result(cv_results):
    results = pd.DataFrame.from_dict(cv_results)
    results = results[results['status'] == "Success"]
    cols = ['rank_test_scores', 'param_classifier:__choice__', 'mean_test_score']
    cols.extend([key for key in cv_results.keys() if key.startswith('metric_')])
    return results[cols]

error_rate = autosklearn.metrics.make_scorer(
    name='custom_error',
    score_func=error,
    optimum=0,
    greater_is_better=False,
    needs_proba=False,
    needs_threshold=False
)

In [5]:
#define show result function
def evaluate_model(model):

  y_test_predict = model.predict(X_test)
  y_test_predict_proba = model.predict_proba(X_test)
  print("Accuracy score: ", sklearn.metrics.accuracy_score(y_test, y_test_predict))
  print('Balanced Accuracy Score: ', sklearn.metrics.balanced_accuracy_score(y_test,y_test_predict))
  print ("Log Loss: ", sklearn.metrics.log_loss(y_test, y_test_predict_proba))
  print("Precision Score: ", sklearn.metrics.precision_score(y_test, y_test_predict))
  print("Recall Score: : ", sklearn.metrics.recall_score(y_test, y_test_predict))
  print("F1 Score: ", sklearn.metrics.f1_score(y_test, y_test_predict))
  print("F1 (beta=2) Score: ", sklearn.metrics.fbeta_score(y_test, y_test_predict,beta=2))
  print("ROC Auc Score: ", sklearn.metrics.roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))
  print ('Confusion Matrix: \n', sklearn.metrics.confusion_matrix (y_test,y_test_predict))

  print("#" * 80)
  print("Metric results")
  print(get_metric_result(model.cv_results_).to_string(index=False))

  model.leaderboard()
  model.sprint_statistics()
  print(model.show_models())
  model.get_models_with_weights()

## Data Loading



In [6]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

#import all data
data = pd.read_csv(r'/content/drive/MyDrive/msc-project-data/features_with_outcome.csv',encoding='utf=8')

y_data = data['outcome']
X_data = data.drop(columns = ['org_uuid','outcome'])

#splitting data into test and full training set
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_data,y_data, test_size = 0.2,stratify=y_data)

Mounted at /content/drive


# Print a list of available Classifier

In [6]:
import autosklearn.pipeline.components.classification
for name in autosklearn.pipeline.components.classification.ClassifierChoice.get_components():
    print(name)

adaboost
bernoulli_nb
decision_tree
extra_trees
gaussian_nb
gradient_boosting
k_nearest_neighbors
lda
liblinear_svc
libsvm_svc
mlp
multinomial_nb
passive_aggressive
qda
random_forest
sgd


## Print a list of available metrics

In [None]:
print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))

Available CLASSIFICATION metrics autosklearn.metrics.*:
	*accuracy
	*balanced_accuracy
	*roc_auc
	*average_precision
	*log_loss
	*precision
	*precision_macro
	*precision_micro
	*precision_samples
	*precision_weighted
	*recall
	*recall_macro
	*recall_micro
	*recall_samples
	*recall_weighted
	*f1
	*f1_macro
	*f1_micro
	*f1_samples
	*f1_weighted


## Build and fit a classifier (mlp) with default target metric



In [7]:
#set up classifier, set no target metric
automl = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,
    #per_run_time_limit=720,
    include_estimators =['mlp'],
    resampling_strategy ='cv',
    resampling_strategy_arguments={'folds': 5},
    ensemble_size=1,
    include_preprocessors = ['no_preprocessing'],
    scoring_functions=[accuracy, balanced_accuracy, precision, recall, f1, error_rate, roc_auc],
    #metric=autosklearn.metrics.recall
)

In [None]:
#fit classifier
automl.fit(X_train, y_train, X_test, y_test)

## Get the Score of the final ensemble



In [9]:
evaluate_model(automl)

Accuracy score:  0.7852419074138531
Balanced Accuracy Score:  0.5808194918978788
Log Loss:  0.4745093289016461
Precision Score:  0.6129032258064516
Recall Score: :  0.1996996996996997
F1 Score:  0.3012457531143828
F1 (beta=2) Score:  0.23082263103089204
ROC Auc Score:  0.7366267207397701
Confusion Matrix: 
 [[4246  168]
 [1066  266]]
################################################################################
Metric results
 rank_test_scores param_classifier:__choice__  mean_test_score  metric_accuracy  metric_balanced_accuracy  metric_precision  metric_recall  metric_f1  metric_custom_error  metric_roc_auc
               11                         mlp         0.775771         0.775771                  0.599044          0.533180       0.269468   0.357177             0.224229        0.729602
               15                         mlp         0.768113         0.768113                  0.500000          0.000000       0.000000   0.000000             0.231887        0.500312
       

# Build and fit classifier (mlp) with f1 as target metric

In [None]:
#set up classifier, set no target metric
automl1 = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,
    #per_run_time_limit=720,
    include_estimators =['mlp'],
    resampling_strategy ='cv',
    resampling_strategy_arguments={'folds': 5},
    ensemble_size=1,
    include_preprocessors = ['no_preprocessing'],
    scoring_functions=[accuracy, balanced_accuracy, precision, recall, f1, error_rate, roc_auc],
    metric=autosklearn.metrics.f1
)

In [None]:
automl1.fit(X_train,y_train,X_test,y_test)

In [12]:
evaluate_model(automl1)

Accuracy score:  0.7399930386355725
Balanced Accuracy Score:  0.6200377994668888
Log Loss:  0.5589776003877271
Precision Score:  0.43349753694581283
Recall Score: :  0.3963963963963964
F1 Score:  0.41411764705882353
F1 (beta=2) Score:  0.4032997250229147
ROC Auc Score:  0.6947280595049058
Confusion Matrix: 
 [[3724  690]
 [ 804  528]]
################################################################################
Metric results
 rank_test_scores param_classifier:__choice__  mean_test_score  metric_accuracy  metric_balanced_accuracy  metric_precision  metric_recall  metric_f1  metric_custom_error  metric_roc_auc
                8                         mlp         0.357177         0.775771                  0.599044          0.533180       0.269468   0.357177             0.224229        0.729602
               18                         mlp         0.284950         0.769636                  0.583309          0.595118       0.235849   0.284950             0.230364        0.740527
      

# Build and fit classifier (mlp) with recall as target metric

In [13]:
#set up classifier, set recall as target metric
automl2 = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,
    #per_run_time_limit=720,
    include_estimators =['mlp'],
    resampling_strategy ='cv',
    resampling_strategy_arguments={'folds': 5},
    ensemble_size=1,
    include_preprocessors = ['no_preprocessing'],
    scoring_functions=[accuracy, balanced_accuracy, precision, recall, f1, error_rate, roc_auc],
    metric=autosklearn.metrics.recall
)

In [None]:
automl2.fit(X_train,y_train,X_test,y_test)

In [15]:
evaluate_model(automl2)

Accuracy score:  0.755134006265228
Balanced Accuracy Score:  0.6511228605134359
Log Loss:  0.5186550964606867
Precision Score:  0.4709976798143852
Recall Score: :  0.4572072072072072
F1 Score:  0.464
F1 (beta=2) Score:  0.4599003171726325
ROC Auc Score:  0.7208713300976555
Confusion Matrix: 
 [[3730  684]
 [ 723  609]]
################################################################################
Metric results
 rank_test_scores param_classifier:__choice__  mean_test_score  metric_accuracy  metric_balanced_accuracy  metric_precision  metric_recall  metric_f1  metric_custom_error  metric_roc_auc
               10                         mlp         0.269468         0.775771                  0.599044          0.533180       0.269468   0.357177             0.224229        0.729602
               18                         mlp         0.205302         0.780775                  0.579906          0.584581       0.205302   0.300446             0.219225        0.732516
               13     

# Build and fit classifier (mlp) with ROC AUC as target metric

In [16]:
#set up classifier, set ROC AUC as target metric
automl3 = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=3600,
    #per_run_time_limit=720,
    include_estimators =['mlp'],
    resampling_strategy ='cv',
    resampling_strategy_arguments={'folds': 5},
    ensemble_size=1,
    include_preprocessors = ['no_preprocessing'],
    scoring_functions=[accuracy, balanced_accuracy, precision, recall, f1, error_rate, roc_auc],
    metric=autosklearn.metrics.roc_auc
)

In [17]:
automl3.fit(X_train,y_train,X_test,y_test)

AutoSklearnClassifier(ensemble_size=1, include_estimators=['mlp'],
                      include_preprocessors=['no_preprocessing'],
                      metric=roc_auc, per_run_time_limit=360,
                      resampling_strategy='cv',
                      resampling_strategy_arguments={'folds': 5},
                      scoring_functions=[accuracy, balanced_accuracy, precision,
                                         recall, f1, custom_error, roc_auc])

In [18]:
evaluate_model(automl3)

Accuracy score:  0.78820048729551
Balanced Accuracy Score:  0.5979469501218482
Log Loss:  0.4694031419361856
Precision Score:  0.6078799249530957
Recall Score: :  0.24324324324324326
F1 Score:  0.3474530831099196
F1 (beta=2) Score:  0.2764033441392254
ROC Auc Score:  0.7468827855948382
Confusion Matrix: 
 [[4205  209]
 [1008  324]]
################################################################################
Metric results
 rank_test_scores param_classifier:__choice__  mean_test_score  metric_accuracy  metric_balanced_accuracy  metric_precision  metric_recall  metric_f1  metric_custom_error  metric_roc_auc
               12                         mlp         0.729602         0.775771                  0.599044          0.533180       0.269468   0.357177             0.224229        0.729602
               10                         mlp         0.732516         0.780775                  0.579906          0.584581       0.205302   0.300446             0.219225        0.732516
         