In [3]:
from tpot import TPOTClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

iris = load_iris() # cargar base

# dividir base en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(iris.data.astype(np.float64),
    iris.target.astype(np.float64), train_size=0.75, test_size=0.25, random_state=42) 

# crear clasificador
tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)

# entrenar clasificador con datos de entrenamiento
tpot.fit(X_train, y_train)

# metricas de evaluación: evaluar pipeline
print(tpot.score(X_test, y_test))
tpot.export('tpot_iris_pipeline.py')

# obtener base preprocesada
preprocess_data=tpot.predict(X_train)




Optimization Progress:   0%|          | 0/300 [00:00<?, ?pipeline/s]



TPOT closed during evaluation in one generation.


TPOT closed prematurely. Will use the current best pipeline.

Best pipeline: DecisionTreeClassifier(PolynomialFeatures(input_matrix, degree=2, include_bias=False, interaction_only=False), criterion=entropy, max_depth=4, min_samples_leaf=20, min_samples_split=10)
0.9736842105263158




In [5]:
preprocess_data

array([0., 0., 2., 1., 1., 0., 0., 1., 2., 2., 1., 2., 1., 2., 1., 0., 2.,
       1., 0., 0., 0., 1., 2., 0., 0., 0., 1., 0., 1., 2., 0., 1., 2., 0.,
       2., 2., 1., 1., 2., 1., 0., 1., 2., 0., 0., 1., 1., 0., 2., 0., 0.,
       1., 1., 2., 2., 2., 2., 1., 0., 0., 2., 2., 0., 0., 0., 2., 2., 0.,
       2., 2., 0., 1., 1., 2., 1., 2., 0., 2., 1., 2., 1., 1., 1., 0., 1.,
       1., 0., 1., 2., 2., 0., 1., 2., 2., 0., 2., 0., 1., 2., 2., 1., 2.,
       1., 1., 2., 2., 0., 1., 2., 0., 1., 2.])

In [6]:
# atributos de pipeline TPOTClassifier
tpot.fitted_pipeline_ # mejor pipeline
tpot.pareto_front_fitted_pipelines_ # dic. con todos los pipelines
tpot.evaluated_individuals_ # pipelines evaluados



{'GaussianNB(input_matrix)': {'generation': 0,
  'mutation_count': 0,
  'crossover_count': 0,
  'predecessor': ('ROOT',),
  'operator_count': 1,
  'internal_cv_score': 0.9280632411067193},
 'RandomForestClassifier(MinMaxScaler(input_matrix), RandomForestClassifier__bootstrap=True, RandomForestClassifier__criterion=gini, RandomForestClassifier__max_features=0.2, RandomForestClassifier__min_samples_leaf=8, RandomForestClassifier__min_samples_split=4, RandomForestClassifier__n_estimators=100)': {'generation': 0,
  'mutation_count': 0,
  'crossover_count': 0,
  'predecessor': ('ROOT',),
  'operator_count': 2,
  'internal_cv_score': 0.9458498023715414},
 'XGBClassifier(MultinomialNB(input_matrix, MultinomialNB__alpha=10.0, MultinomialNB__fit_prior=False), XGBClassifier__learning_rate=0.01, XGBClassifier__max_depth=9, XGBClassifier__min_child_weight=13, XGBClassifier__n_estimators=100, XGBClassifier__n_jobs=1, XGBClassifier__subsample=0.45, XGBClassifier__verbosity=0)': {'generation': 0,
  '

In [4]:
# métodos (funciones) de pipeline TPOTClassifier
#tpot.fit(X_train, y_train)
tpot.predict(X_test) # predecir etiquetas
tpot.predict_proba(X_test) # predecir probabilidad de clase 
tpot.score(X_test, y_test) # metricas de evaluacion: evaluar accuracy
tpot.export('TPOTClassifier1')


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer
from tpot.export_utils import set_param_recursive

# NOTE: Make sure that the outcome column is labeled 'target' in the data file

tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) # cargar datos
features = tpot_data.drop('target', axis=1) # definir variables/predictores (X)
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'], random_state=42) # dividir en datos de entrenamiento y prueba 

# Average CV score on the training set was: 0.9826086956521738

# hacer pipeline
# parametros: Normalizer, KNeighborsClassifier
exported_pipeline = make_pipeline(
    Normalizer(norm="l2"),
    KNeighborsClassifier(n_neighbors=5, p=2, weights="distance")
)
# Fix random state for all the steps in exported pipeline
set_param_recursive(exported_pipeline.steps, 'random_state', 42)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)

In [1]:
from tpot import TPOTClassifier
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

digits = load_digits() # cargar datos

# dividir datos de entrenamiento y datos de prueba
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,
                                                    train_size=0.75, test_size=0.25, random_state=42)

# crear TPOTClassifier pipeline 
tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
tpot.fit(X_train, y_train) # entrenar/ajustar pipeline con datos de entrenamiento
print(tpot.score(X_test, y_test)) # metricas de evaluacion
tpot.export('tpot_digits_pipeline.py')



Optimization Progress:   0%|          | 0/300 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.9844058928817294

Generation 2 - Current best internal CV score: 0.9844058928817294

Generation 3 - Current best internal CV score: 0.9844058928817294

Generation 4 - Current best internal CV score: 0.9844058928817294

Generation 5 - Current best internal CV score: 0.9866363761531047

Best pipeline: KNeighborsClassifier(input_matrix, n_neighbors=2, p=2, weights=distance)
0.9822222222222222
