In [54]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from autogluon.tabular import TabularPredictor

# Загрузка датасета Iris
iris = load_iris()
X = iris.data
y = iris.target

print(X[:10])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]


In [55]:
# Применение PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(X_pca[:10])

[[-2.68412563  0.31939725]
 [-2.71414169 -0.17700123]
 [-2.88899057 -0.14494943]
 [-2.74534286 -0.31829898]
 [-2.72871654  0.32675451]
 [-2.28085963  0.74133045]
 [-2.82053775 -0.08946138]
 [-2.62614497  0.16338496]
 [-2.88638273 -0.57831175]
 [-2.6727558  -0.11377425]]


In [56]:
# Создание нового датасета с использованием новых признаков
new_data = np.concatenate((X_pca, y.reshape(-1, 1)), axis=1)
new_data = pd.DataFrame(new_data, columns=['feature1', 'feature2', 'target'])

In [57]:
# Создание и обучение модели с помощью AutoGluon
predictor = TabularPredictor(label='target').fit(new_data, presets='medium_quality')
# predictor = TabularPredictor.load("AutogluonModels/ag-20240209_065058")

No path specified. Models will be saved in: "AutogluonModels/ag-20240209_135120"
Presets specified: ['medium_quality']
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240209_135120"
AutoGluon Version:  1.0.0
Python Version:     3.9.6
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 21.6.0: Mon Dec 19 20:46:01 PST 2022; root:xnu-8020.240.18~2/RELEASE_ARM64_T8101
CPU Count:          8
Memory Avail:       6.33 GB / 16.00 GB (39.6%)
Disk Space Avail:   56.47 GB / 228.27 GB (24.7%)
Train Data Rows:    150
Train Data Columns: 2
Label Column:       target
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == float, but few unique label-values observed and label-values can be converted to int).
	3 unique label values:  [0.0, 1.0, 2.0]
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify pr

In [58]:
# Прогнозирование на новых данных
new_X = np.array([[5.1, 3.5, 1.4, 0.2], [5.9, 3.0, 5.1, 1.8], [5.1, 2.5, 3.0, 1.1]])  # Пример новых данных
new_X_pca = pca.transform(new_X)
test_data = pd.DataFrame(np.concatenate((new_X_pca, [[0], [2], [1]]), axis=1),
                        columns=['feature1', 'feature2', 'target'])
predictions = predictor.predict(test_data)

predictions.head()

0    0.0
1    2.0
2    1.0
Name: target, dtype: float64

In [59]:
predictor.evaluate(new_data, silent=True)

{'accuracy': 1.0, 'balanced_accuracy': 1.0, 'mcc': 1.0}

In [60]:
predictor.leaderboard(new_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTreesGini,1.0,1.0,accuracy,0.02941,0.014375,0.199694,0.02941,0.014375,0.199694,1,True,9
1,ExtraTreesEntr,1.0,1.0,accuracy,0.030488,0.026259,0.205901,0.030488,0.026259,0.205901,1,True,10
2,WeightedEnsemble_L2,1.0,1.0,accuracy,0.031276,0.026497,0.409636,0.000788,0.000238,0.203735,2,True,14
3,LightGBMLarge,0.993333,1.0,accuracy,0.002221,0.000766,1.690082,0.002221,0.000766,1.690082,1,True,13
4,KNeighborsDist,0.993333,0.966667,accuracy,0.013983,0.014922,0.002849,0.013983,0.014922,0.002849,1,True,2
5,RandomForestEntr,0.993333,0.966667,accuracy,0.028854,0.014579,0.192665,0.028854,0.014579,0.192665,1,True,7
6,RandomForestGini,0.993333,0.966667,accuracy,0.030254,0.014202,0.199318,0.030254,0.014202,0.199318,1,True,6
7,NeuralNetTorch,0.98,1.0,accuracy,0.002324,0.001751,0.222265,0.002324,0.001751,0.222265,1,True,12
8,XGBoost,0.98,1.0,accuracy,0.008882,0.001385,0.178305,0.008882,0.001385,0.178305,1,True,11
9,LightGBM,0.966667,1.0,accuracy,0.001231,0.000764,0.508965,0.001231,0.000764,0.508965,1,True,5


In [61]:
predictor.evaluate(test_data, silent=True)

{'accuracy': 1.0, 'balanced_accuracy': 1.0, 'mcc': 1.0}

In [62]:
predictor.leaderboard(test_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,CatBoost,1.0,0.966667,accuracy,0.000815,0.000536,0.190729,0.000815,0.000536,0.190729,1,True,8
1,LightGBMLarge,1.0,1.0,accuracy,0.001745,0.000766,1.690082,0.001745,0.000766,1.690082,1,True,13
2,NeuralNetTorch,1.0,1.0,accuracy,0.00235,0.001751,0.222265,0.00235,0.001751,0.222265,1,True,12
3,LightGBM,1.0,1.0,accuracy,0.00312,0.000764,0.508965,0.00312,0.000764,0.508965,1,True,5
4,LightGBMXT,1.0,1.0,accuracy,0.00631,0.000981,0.593613,0.00631,0.000981,0.593613,1,True,4
5,XGBoost,1.0,1.0,accuracy,0.008873,0.001385,0.178305,0.008873,0.001385,0.178305,1,True,11
6,NeuralNetFastAI,1.0,1.0,accuracy,0.013065,0.003032,0.389668,0.013065,0.003032,0.389668,1,True,3
7,ExtraTreesEntr,1.0,1.0,accuracy,0.016284,0.026259,0.205901,0.016284,0.026259,0.205901,1,True,10
8,ExtraTreesGini,1.0,1.0,accuracy,0.017004,0.014375,0.199694,0.017004,0.014375,0.199694,1,True,9
9,WeightedEnsemble_L2,1.0,1.0,accuracy,0.017022,0.026497,0.409636,0.000738,0.000238,0.203735,2,True,14
