In [1]:
import numpy as np
import pandas as pd
import autokeras as ak
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

# Загрузка датасета Iris
iris = load_iris()
# Разделение данных на обучающий и тестовый наборы
x_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.75, test_size=0.25)

print(x_train[:10])

Using TensorFlow backend
[[4.4 3.  1.3 0.2]
 [7.2 3.6 6.1 2.5]
 [5.1 3.5 1.4 0.3]
 [7.7 2.8 6.7 2. ]
 [6.7 2.5 5.8 1.8]
 [6.1 2.9 4.7 1.4]
 [6.7 3.3 5.7 2.5]
 [6.6 3.  4.4 1.4]
 [7.  3.2 4.7 1.4]
 [4.9 2.5 4.5 1.7]]


In [2]:
# Применение PCA на обучающем наборе
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(x_train)
print(X_train_pca[:10])

[[-3.05019095 -0.42083567]
 [ 2.87156317  0.7313495 ]
 [-2.70235844  0.37298317]
 [ 3.44673919  0.40273893]
 [ 2.25466273 -0.27937481]
 [ 0.92154689 -0.13625065]
 [ 2.36422697  0.26350703]
 [ 0.84514007  0.32171132]
 [ 1.23668883  0.67080655]
 [ 0.43776308 -1.19824336]]


In [3]:
# Создание нового датасета с использованием новых признаков
new_data = np.concatenate((X_train_pca, y_train.reshape(-1, 1)), axis=1)
new_data = pd.DataFrame(new_data, columns=['feature1', 'feature2', 'target'])

In [4]:
# Initialize the structured data classifier.
clf = ak.StructuredDataClassifier(
    overwrite=True, max_trials=10
)  # It tries 10 different models.

In [None]:
# Feed the structured data classifier with training data.
clf.fit(
    X_train_pca,
    y_train,
    epochs=50,
)

In [6]:
# Прогнозирование на новых данных
X_test_pca = pca.transform(X_test)
predict_res = clf.predict(X_test_pca)
print(predict_res)



[['2']
 ['2']
 ['0']
 ['0']
 ['2']
 ['0']
 ['1']
 ['1']
 ['0']
 ['0']
 ['1']
 ['0']
 ['0']
 ['2']
 ['1']
 ['1']
 ['0']
 ['1']
 ['2']
 ['2']
 ['1']
 ['1']
 ['0']
 ['0']
 ['0']
 ['0']
 ['1']
 ['1']
 ['0']
 ['2']
 ['2']
 ['1']
 ['0']
 ['2']
 ['2']
 ['1']
 ['1']
 ['2']]


In [7]:
# Evaluate the best model with testing data.
eval_res = clf.evaluate(X_test_pca, y_test)
print(eval_res)



[0.24616986513137817, 0.9210526347160339]


In [8]:
model = clf.export_model()
model.summary()



Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2)]               0         
                                                                 
 multi_category_encoding (M  (None, 2)                 0         
 ultiCategoryEncoding)                                           
                                                                 
 normalization (Normalizati  (None, 2)                 5         
 on)                                                             
                                                                 
 dense (Dense)               (None, 32)                96        
                                                                 
 batch_normalization (Batch  (None, 32)                128       
 Normalization)                                                  
                                                             