In [10]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from tpot import TPOTClassifier

# Загрузка датасета Iris
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.75, test_size=0.25)

print(X_train[:10])

[[6.4 3.1 5.5 1.8]
 [5.8 2.7 5.1 1.9]
 [7.2 3.2 6.  1.8]
 [5.4 3.9 1.7 0.4]
 [6.1 2.6 5.6 1.4]
 [5.8 2.7 4.1 1. ]
 [4.8 3.4 1.6 0.2]
 [6.1 3.  4.6 1.4]
 [6.3 3.3 6.  2.5]
 [7.7 2.6 6.9 2.3]]


In [11]:
# Применение PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_train)
print(X_pca[:10])

[[ 1.7694165  -0.0140171 ]
 [ 1.26319799 -0.63395699]
 [ 2.49091733  0.50636082]
 [-2.39949092  0.77779576]
 [ 1.63404012 -0.5339066 ]
 [ 0.09170064 -0.32239799]
 [-2.74821862  0.0662639 ]
 [ 0.75306401 -0.05967973]
 [ 2.39252781 -0.13283205]
 [ 3.66304002  0.19887044]]


In [12]:
# Создание нового датасета с использованием новых признаков
new_data = np.concatenate((X_pca, y_train.reshape(-1, 1)), axis=1)
new_data = pd.DataFrame(new_data, columns=['feature1', 'feature2', 'target'])

In [13]:
# Создание и обучение модели с помощью TPOT
tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2)

In [14]:
tpot.fit(X_train, y_train)

Optimization Progress:   0%|          | 0/120 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.9731225296442687

Generation 2 - Current best internal CV score: 0.9731225296442687

Generation 3 - Current best internal CV score: 0.9818181818181818

Generation 4 - Current best internal CV score: 0.9818181818181818

Generation 5 - Current best internal CV score: 0.9818181818181818

Best pipeline: MLPClassifier(input_matrix, alpha=0.0001, learning_rate_init=0.001)


In [15]:
# Прогнозирование на новых данных
predictions = tpot.score(X_test, y_test)

print("fitted pipelines")
print(tpot.pareto_front_fitted_pipelines_)

fitted pipelines
{'MLPClassifier(input_matrix, MLPClassifier__alpha=0.0001, MLPClassifier__learning_rate_init=0.001)': Pipeline(steps=[('mlpclassifier', MLPClassifier())])}


In [16]:
print("models:")
# Вывод использованных моделей
for model in tpot.fitted_pipeline_:
    print(model)

models:
MLPClassifier()


In [17]:
print("predictions")
print(predictions)

predictions
0.9736842105263158
