# Automated Machine Learning (AutoML)

In [None]:
!pip install -r requirements.txt 

In [None]:
from tpot import TPOTClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
RANDOM_SEED = 42

## Load breast cancer data

In [None]:
data = load_breast_cancer()
X, y = data.data, data.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=RANDOM_SEED, 
                                                    stratify=y)

## Intialize Tree Based Optimization Tool (TPOT)

In [None]:
tpot_class = TPOTClassifier(verbosity=3,
                            random_state=RANDOM_SEED,
                            generations=5,
                            population_size=50,
                            n_jobs=-1,
                            mutation_rate=0.9,
                            crossover_rate=0.1,
                            cv=5,
                            scoring='f1')

In [None]:
tpot_class.fit(X_train, y_train)

In [None]:
score = tpot_class.score(X_test, y_test)
print(f"Test F1: {score:.4f}")

In [None]:
y_pred = tpot_class.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

In [None]:
from modelviz.confusion_matrix import plot_confusion_matrix
plot_confusion_matrix(cm=cm, 
                      classes=data.target_names,
                      model_name='TPOTClassifier',
                      table_fontsize=7,
                      cmap='Greys',
                      label_positions_color='grey',
                      proportions_color='aqua')

## Export the best pipeline

In [None]:
# Export the best pipeline
import os
os.makedirs('tpot_exported_pipeline', 
            exist_ok=True)
tpot_class.export(os.path.join("tpot_exported_pipeline",
                               'tpot_best_pipeline.py'))