In [22]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from autogluon.tabular import TabularPredictor

# Загрузка датасета Iris
iris = load_iris()
X = iris.data
y = iris.target

print(X[:10])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]]


In [23]:
# Применение PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print(X_pca[:10])

[[-2.68412563  0.31939725]
 [-2.71414169 -0.17700123]
 [-2.88899057 -0.14494943]
 [-2.74534286 -0.31829898]
 [-2.72871654  0.32675451]
 [-2.28085963  0.74133045]
 [-2.82053775 -0.08946138]
 [-2.62614497  0.16338496]
 [-2.88638273 -0.57831175]
 [-2.6727558  -0.11377425]]


In [24]:
# Создание нового датасета с использованием новых признаков
new_data = np.concatenate((X_pca, y.reshape(-1, 1)), axis=1)
new_data = pd.DataFrame(new_data, columns=['feature1', 'feature2', 'target'])

In [25]:
# Создание и обучение модели с помощью AutoGluon
predictor = TabularPredictor(label='target').fit(new_data, presets='medium_quality')
# predictor = TabularPredictor.load("AutogluonModels/ag-20240209_065058")

No path specified. Models will be saved in: "AutogluonModels/ag-20240209_134018"
Presets specified: ['medium_quality']
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240209_134018"
AutoGluon Version:  1.0.0
Python Version:     3.9.6
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 21.6.0: Mon Dec 19 20:46:01 PST 2022; root:xnu-8020.240.18~2/RELEASE_ARM64_T8101
CPU Count:          8
Memory Avail:       6.76 GB / 16.00 GB (42.3%)
Disk Space Avail:   55.19 GB / 228.27 GB (24.2%)
Train Data Rows:    150
Train Data Columns: 2
Label Column:       target
AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == float, but few unique label-values observed and label-values can be converted to int).
	3 unique label values:  [0.0, 1.0, 2.0]
	If 'multiclass' is not the correct problem_type, please manually specify the problem_type parameter during predictor init (You may specify pr

In [26]:
# Прогнозирование на новых данных
new_X = np.array([[5.1, 3.5, 1.4, 0.2], [5.9, 3.0, 5.1, 1.8], [5.1, 2.5, 3.0, 1.1]])  # Пример новых данных
new_X_pca = pca.transform(new_X)
test_data = pd.DataFrame(np.concatenate((new_X_pca, np.zeros((new_X.shape[0], 1))), axis=1),
                        columns=['feature1', 'feature2', 'target'])
predictions = predictor.predict(test_data)

predictions.head()

0    0.0
1    2.0
2    1.0
Name: target, dtype: float64

In [27]:
predictor.evaluate(new_data, silent=True)

{'accuracy': 1.0, 'balanced_accuracy': 1.0, 'mcc': 1.0}

In [28]:
predictor.leaderboard(new_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTreesGini,1.0,1.0,accuracy,0.029066,0.013501,0.20538,0.029066,0.013501,0.20538,1,True,9
1,ExtraTreesEntr,1.0,1.0,accuracy,0.030894,0.014218,0.195897,0.030894,0.014218,0.195897,1,True,10
2,WeightedEnsemble_L2,1.0,1.0,accuracy,0.031732,0.014449,0.402034,0.000838,0.000231,0.206137,2,True,14
3,LightGBMLarge,0.993333,1.0,accuracy,0.002433,0.000762,1.705988,0.002433,0.000762,1.705988,1,True,13
4,KNeighborsDist,0.993333,0.966667,accuracy,0.013558,0.013513,0.002712,0.013558,0.013513,0.002712,1,True,2
5,RandomForestGini,0.993333,0.966667,accuracy,0.026818,0.013673,0.200143,0.026818,0.013673,0.200143,1,True,6
6,RandomForestEntr,0.993333,0.966667,accuracy,0.02934,0.013612,0.199914,0.02934,0.013612,0.199914,1,True,7
7,NeuralNetTorch,0.98,1.0,accuracy,0.002406,0.001492,0.222422,0.002406,0.001492,0.222422,1,True,12
8,XGBoost,0.98,1.0,accuracy,0.008898,0.001181,0.179256,0.008898,0.001181,0.179256,1,True,11
9,LightGBM,0.966667,1.0,accuracy,0.001203,0.000728,0.489312,0.001203,0.000728,0.489312,1,True,5


In [29]:
predictor.evaluate(test_data, silent=True)

{'accuracy': 0.3333333333333333,
 'balanced_accuracy': 0.7777777777777777,
 'mcc': 0.0}

In [30]:
predictor.leaderboard(test_data)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,CatBoost,0.333333,0.966667,accuracy,0.00085,0.000574,0.193996,0.00085,0.000574,0.193996,1,True,8
1,LightGBM,0.333333,1.0,accuracy,0.001788,0.000728,0.489312,0.001788,0.000728,0.489312,1,True,5
2,LightGBMLarge,0.333333,1.0,accuracy,0.001936,0.000762,1.705988,0.001936,0.000762,1.705988,1,True,13
3,NeuralNetTorch,0.333333,1.0,accuracy,0.002301,0.001492,0.222422,0.002301,0.001492,0.222422,1,True,12
4,LightGBMXT,0.333333,1.0,accuracy,0.002981,0.000839,0.559425,0.002981,0.000839,0.559425,1,True,4
5,NeuralNetFastAI,0.333333,1.0,accuracy,0.0063,0.003323,0.422933,0.0063,0.003323,0.422933,1,True,3
6,XGBoost,0.333333,1.0,accuracy,0.008217,0.001181,0.179256,0.008217,0.001181,0.179256,1,True,11
7,KNeighborsUnif,0.333333,0.966667,accuracy,0.014822,0.015451,0.002872,0.014822,0.015451,0.002872,1,True,1
8,KNeighborsDist,0.333333,0.966667,accuracy,0.015171,0.013513,0.002712,0.015171,0.013513,0.002712,1,True,2
9,ExtraTreesEntr,0.333333,1.0,accuracy,0.018108,0.014218,0.195897,0.018108,0.014218,0.195897,1,True,10
