In [63]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor

# Загрузка датасета Iris
iris = load_iris()
# Разделение данных на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, train_size=0.75, test_size=0.25)

print(X_train[:10])

[[5.9 3.  4.2 1.5]
 [5.8 2.6 4.  1.2]
 [6.8 3.  5.5 2.1]
 [4.7 3.2 1.3 0.2]
 [6.9 3.1 5.1 2.3]
 [5.  3.5 1.6 0.6]
 [5.4 3.7 1.5 0.2]
 [5.  2.  3.5 1. ]
 [6.5 3.  5.5 1.8]
 [6.7 3.3 5.7 2.5]]


In [64]:
# Применение PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_train)
print(X_pca[:10])

[[ 0.45149168 -0.12650805]
 [ 0.16235194 -0.43115786]
 [ 2.11263303  0.14898743]
 [-2.94949688 -0.08892385]
 [ 1.87495486  0.34554795]
 [-2.45736119  0.23886415]
 [-2.54886975  0.68862204]
 [-0.59653901 -1.27590863]
 [ 1.89191153 -0.01527315]
 [ 2.36903557  0.24564653]]


In [65]:
# Создание нового датасета с использованием новых признаков
new_data = np.concatenate((X_pca, y_train.reshape(-1, 1)), axis=1)
new_data = pd.DataFrame(new_data, columns=['feature1', 'feature2', 'target'])

In [None]:
# Создание и обучение модели с помощью AutoGluon
predictor = TabularPredictor(label='target').fit(new_data, presets='medium_quality')
# predictor = TabularPredictor.load("AutogluonModels/ag-20240209_065058")

In [68]:
# Прогнозирование на новых данных
new_X_pca = pca.transform(X_test)
test_data = pd.DataFrame(np.concatenate((new_X_pca, y_test.reshape(-1, 1)), axis=1),
                        columns=['feature1', 'feature2', 'target'])
predictions = predictor.predict(test_data)

predictions.head()

0    2.0
1    1.0
2    0.0
3    2.0
4    0.0
Name: target, dtype: float64

In [69]:
predictor.evaluate(new_data, silent=True)

{'accuracy': 1.0, 'balanced_accuracy': 1.0, 'mcc': 1.0}

In [70]:
predictor.leaderboard(new_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBMLarge,1.0,1.0,accuracy,0.002503,0.000726,1.705303,0.002503,0.000726,1.705303,1,True,13
1,XGBoost,1.0,1.0,accuracy,0.011064,0.001213,0.195539,0.011064,0.001213,0.195539,1,True,11
2,KNeighborsDist,1.0,1.0,accuracy,0.015522,0.012714,0.002239,0.015522,0.012714,0.002239,1,True,2
3,ExtraTreesGini,1.0,1.0,accuracy,0.031196,0.025854,0.206925,0.031196,0.025854,0.206925,1,True,9
4,ExtraTreesEntr,1.0,1.0,accuracy,0.031352,0.013456,0.200014,0.031352,0.013456,0.200014,1,True,10
5,WeightedEnsemble_L2,1.0,1.0,accuracy,0.032053,0.026095,0.406775,0.000857,0.000241,0.19985,2,True,14
6,RandomForestEntr,0.991071,0.956522,accuracy,0.029763,0.02465,0.200557,0.029763,0.02465,0.200557,1,True,7
7,RandomForestGini,0.991071,0.956522,accuracy,0.029972,0.012879,0.185271,0.029972,0.012879,0.185271,1,True,6
8,LightGBM,0.982143,1.0,accuracy,0.00138,0.000682,0.558241,0.00138,0.000682,0.558241,1,True,5
9,NeuralNetFastAI,0.973214,1.0,accuracy,0.011552,0.00298,0.263346,0.011552,0.00298,0.263346,1,True,3


In [71]:
predictor.evaluate(test_data, silent=True)

{'accuracy': 0.9736842105263158,
 'balanced_accuracy': 0.9791666666666666,
 'mcc': 0.9608244925752507}

In [72]:
predictor.leaderboard(test_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,NeuralNetTorch,0.973684,1.0,accuracy,0.002272,0.001405,0.23264,0.002272,0.001405,0.23264,1,True,12
1,KNeighborsDist,0.973684,1.0,accuracy,0.015115,0.012714,0.002239,0.015115,0.012714,0.002239,1,True,2
2,KNeighborsUnif,0.973684,1.0,accuracy,0.01663,0.017141,0.005077,0.01663,0.017141,0.005077,1,True,1
3,ExtraTreesGini,0.973684,1.0,accuracy,0.026871,0.025854,0.206925,0.026871,0.025854,0.206925,1,True,9
4,WeightedEnsemble_L2,0.973684,1.0,accuracy,0.028003,0.026095,0.406775,0.001132,0.000241,0.19985,2,True,14
5,ExtraTreesEntr,0.973684,1.0,accuracy,0.02993,0.013456,0.200014,0.02993,0.013456,0.200014,1,True,10
6,NeuralNetFastAI,0.947368,1.0,accuracy,0.007386,0.00298,0.263346,0.007386,0.00298,0.263346,1,True,3
7,RandomForestGini,0.947368,0.956522,accuracy,0.030107,0.012879,0.185271,0.030107,0.012879,0.185271,1,True,6
8,RandomForestEntr,0.947368,0.956522,accuracy,0.031562,0.02465,0.200557,0.031562,0.02465,0.200557,1,True,7
9,LightGBMXT,0.921053,0.956522,accuracy,0.000939,0.001125,0.416156,0.000939,0.001125,0.416156,1,True,4
