In [2]:
#surrogate models
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.model_selection import train_test_split

data_name = 'wings'

data_file = f'../{data_name}_vectors_drags_lifts.csv'
df = TabularDataset(data_file)

train_df, test_df = train_test_split(df, test_size=0.2, random_state=777)

#exclue the first two columns of train data
train_data = train_df.drop(columns=['i', 'name'])
train_data.head()

Unnamed: 0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,dim_9,dim_10,...,dim_121,dim_122,dim_123,dim_124,dim_125,dim_126,dim_127,dim_128,drag,lift
406,-0.496165,1.705992,0.197353,1.742359,0.467517,-1.339144,-0.647645,0.697645,0.994912,0.193609,...,-0.770262,-0.448141,-0.866822,-2.491501,0.482521,0.362971,0.389367,-0.237259,0.395,0.425
54,0.033569,1.133478,1.176343,-0.943174,-0.657064,-0.401192,0.747147,1.081339,-1.888797,-1.40757,...,0.767994,-1.674341,0.346039,0.3872,-0.006894,0.338835,0.207995,0.574507,0.645,0.885
241,-0.559282,0.308935,-0.047847,-1.081136,0.175241,-1.149952,0.71952,0.39092,-0.075074,-0.300515,...,1.81544,0.596989,1.033692,-0.362098,0.247644,0.190097,-1.081548,-0.536878,0.446,0.756
952,-0.222745,0.463468,0.462111,0.234607,-0.859618,-2.425318,0.684795,-1.679074,1.190695,-0.385462,...,-2.272429,0.947205,-0.633594,-0.539486,0.290193,-0.221363,-1.451571,0.765826,0.384,0.177
337,1.397874,0.460493,-1.091555,-0.442342,0.823289,-1.108367,0.877267,-0.00336,0.461756,-0.80428,...,-1.359217,-1.532384,0.735832,0.230598,0.350978,0.722657,-0.1557,-0.85193,0.381,0.074


In [3]:
#save train_df to a csv file
train_df.to_csv('./train_df.csv', index=False)


In [4]:
#save test_df to a csv file
test_df.to_csv('./test_df.csv', index=False)

In [5]:
label = 'drag'
print("Summary of class variable: \n", train_data[label].describe())

Summary of class variable: 
 count    837.000000
mean       0.445233
std        0.112500
min        0.051000
25%        0.369000
50%        0.427000
75%        0.513000
max        0.944000
Name: drag, dtype: float64


In [6]:
import os
save_path = f'./agModels-{data_name}_{label}'  # specifies folder to store trained models
if not os.path.exists(save_path):
    os.makedirs(save_path)

bag_folds = 5 #suggestion range [5, 10]
bag_sets = 3 #suggestion range [1, 20]
stack_levels = 3 #suggestion range [0, 3]
metric = 'root_mean_squared_error' #Regression:mean_absolute_error, mean_squared_error,root_mean_squared_error (default), r2
predictor = TabularPredictor(label=label, path=save_path, eval_metric=metric).fit(train_data, 
                                                                                  presets='best_quality', 
                                                                                  auto_stack="True", 
                                                                                  num_bag_folds=bag_folds, 
                                                                                  num_bag_sets=bag_sets,
                                                                                  num_stack_levels=stack_levels,
                                                                                  verbosity=4)

Presets specified: ['best_quality']
User Specified kwargs:
{'auto_stack': 'True',
 'num_bag_folds': 5,
 'num_bag_sets': 3,
 'num_stack_levels': 3,
 'verbosity': 4}
Full kwargs:
{'_feature_generator_kwargs': None,
 '_save_bag_folds': None,
 'ag_args': None,
 'ag_args_ensemble': None,
 'ag_args_fit': None,
 'auto_stack': 'True',
 'calibrate': 'auto',
 'excluded_model_types': None,
 'feature_generator': 'auto',
 'feature_prune_kwargs': None,
 'holdout_frac': None,
 'hyperparameter_tune_kwargs': None,
 'keep_only_best': False,
 'name_suffix': None,
 'num_bag_folds': 5,
 'num_bag_sets': 3,
 'num_stack_levels': 3,
 'pseudo_data': None,
 'refit_full': False,
 'save_space': False,
 'set_best_to_refit_full': False,
 'unlabeled_data': None,
 'use_bag_holdout': False,
 'verbosity': 4}
Stack configuration (auto_stack=True): num_stack_levels=3, num_bag_folds=5, num_bag_sets=3
Saving ./agModels-wings_drag/learner.pkl
Saving ./agModels-wings_drag/predictor.pkl
Beginning AutoGluon training ...
AutoGlu

In [7]:
test_data = test_df.drop(columns=['i', 'name'])
# val_data.head()
y_val = test_data[label]
test_data_nolab = test_data.drop(columns=[label])  # delete label column to prove we're not cheating
test_data_nolab.head()

Unnamed: 0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,dim_9,dim_10,...,dim_120,dim_121,dim_122,dim_123,dim_124,dim_125,dim_126,dim_127,dim_128,lift
1046,-0.242641,-0.544351,-0.785688,0.915798,0.159682,-0.134473,0.513026,-0.386129,-1.244955,0.406885,...,0.504696,-0.266971,0.339783,-0.708615,-0.384231,-0.775731,0.406653,-0.137757,1.354433,0.346
657,-0.086765,-0.885193,0.826741,-0.850657,0.465779,-1.419797,-0.319893,0.348212,0.397075,-1.688038,...,1.223718,-1.820433,0.109049,0.877518,-0.688451,-0.905782,0.322554,0.742038,-0.282902,0.654
127,0.993629,0.058908,0.058237,-0.193928,-0.465946,0.194533,0.438341,-0.264994,-0.371554,1.000921,...,-1.863156,-0.465709,0.748947,-1.5375,1.178252,0.856316,0.28775,0.008825,0.656394,0.052
951,-0.238668,0.338577,-0.576213,-0.241772,-0.971682,1.144228,1.083222,-2.30459,0.678272,-0.991069,...,1.038386,-0.669204,1.110296,0.581472,0.028183,0.67845,-0.005965,-0.314793,-0.241957,0.604
134,-1.254356,0.55711,0.38981,0.332288,-0.160778,-0.05589,-1.489394,-0.652277,-0.348175,-0.549951,...,0.401014,-2.166166,-0.27722,-0.422792,0.157305,0.493497,-1.234308,-0.001335,0.454736,0.199


In [8]:
# %%capture log_output
# %config InlineBackend.figure_format = 'retina'
# %config Application.log_level = 'DEBUG'
# %config IPCompleter.greedy = True

predictor = TabularPredictor.load(save_path)  # unnecessary, just demonstrates how to load previously-trained predictor from file
y_pred = predictor.predict(test_data_nolab)
for item in y_pred:
    print(item)
print("Predictions:  \n", y_pred)
perf = predictor.evaluate_predictions(y_true=y_val, y_pred=y_pred, auxiliary_metrics=True)
print(perf)

results = predictor.fit_summary(show_plot=True)
print(results)
print(predictor.leaderboard(test_data, silent=True))

# with open('./output_all_parts.log', 'w') as f:
#     f.write(log_output.stdout)

Loading: ./agModels-wings_drag/predictor.pkl
Loading: ./agModels-wings_drag/learner.pkl
Loading: ./agModels-wings_drag/models/trainer.pkl
Loading: ./agModels-wings_drag/models/CatBoost_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/KNeighborsDist_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetFastAI_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetTorch_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/WeightedEnsemble_L2/model.pkl
Evaluation: root_mean_squared_error on test data: -0.08296561338100417
	Note: Scores are always higher_is_better. This metric score can be multiplied by -1 to get the metric value.
Evaluations on test data:
{
    "root_mean_squared_error": -0.08296561338100417,
    "mean_squared_error": -0.006883293003686257,
    "mean_absolute_error": -0.05817961138089499,
    "r2": 0.46211796008999806,
    "pearsonr": 0.6814670655969325,
    "median_absolute_error": -0.039285244584083556
}
Loading: ./agModels-wings_drag/models/K

0.47036629915237427
0.3335382342338562
0.43195658922195435
0.4288609027862549
0.43751829862594604
0.5908997654914856
0.4010847210884094
0.36850205063819885
0.35644587874412537
0.36648058891296387
0.46563971042633057
0.49456512928009033
0.41071242094039917
0.3698299527168274
0.6117626428604126
0.5627411603927612
0.3639877736568451
0.5535755157470703
0.5714623928070068
0.4222415089607239
0.4752230942249298
0.38462239503860474
0.5269132256507874
0.48046642541885376
0.43018436431884766
0.4458569884300232
0.5632071495056152
0.3982229232788086
0.41792139410972595
0.3360244631767273
0.3987458646297455
0.5225263237953186
0.42318373918533325
0.5020925998687744
0.46921032667160034
0.3872513175010681
0.3877437710762024
0.3786271810531616
0.336036741733551
0.4210362434387207
0.42496442794799805
0.3872826397418976
0.6093036532402039
0.3948030471801758
0.5237514972686768
0.3713464140892029
0.3845136761665344
0.42671942710876465
0.35966092348098755
0.32816439867019653
0.5685794353485107
0.44233694672

Loading: ./agModels-wings_drag/models/KNeighborsUnif_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/KNeighborsDist_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/LightGBMXT_BAG_L1/model.pkl


*** End of fit() summary ***
{'model_types': {'KNeighborsUnif_BAG_L1': 'StackerEnsembleModel_KNN', 'KNeighborsDist_BAG_L1': 'StackerEnsembleModel_KNN', 'LightGBMXT_BAG_L1': 'StackerEnsembleModel_LGB', 'LightGBM_BAG_L1': 'StackerEnsembleModel_LGB', 'RandomForestMSE_BAG_L1': 'StackerEnsembleModel_RF', 'CatBoost_BAG_L1': 'StackerEnsembleModel_CatBoost', 'ExtraTreesMSE_BAG_L1': 'StackerEnsembleModel_XT', 'NeuralNetFastAI_BAG_L1': 'StackerEnsembleModel_NNFastAiTabular', 'XGBoost_BAG_L1': 'StackerEnsembleModel_XGBoost', 'NeuralNetTorch_BAG_L1': 'StackerEnsembleModel_TabularNeuralNetTorch', 'LightGBMLarge_BAG_L1': 'StackerEnsembleModel_LGB', 'WeightedEnsemble_L2': 'WeightedEnsembleModel', 'LightGBMXT_BAG_L2': 'StackerEnsembleModel_LGB', 'LightGBM_BAG_L2': 'StackerEnsembleModel_LGB', 'RandomForestMSE_BAG_L2': 'StackerEnsembleModel_RF', 'CatBoost_BAG_L2': 'StackerEnsembleModel_CatBoost', 'ExtraTreesMSE_BAG_L2': 'StackerEnsembleModel_XT', 'NeuralNetFastAI_BAG_L2': 'StackerEnsembleModel_NNFastAiT

Loading: ./agModels-wings_drag/models/LightGBM_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/RandomForestMSE_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/CatBoost_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/ExtraTreesMSE_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetFastAI_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/XGBoost_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetTorch_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/LightGBMLarge_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/WeightedEnsemble_L2/model.pkl
Loading: ./agModels-wings_drag/models/LightGBMXT_BAG_L2/model.pkl
Loading: ./agModels-wings_drag/models/LightGBM_BAG_L2/model.pkl
Loading: ./agModels-wings_drag/models/RandomForestMSE_BAG_L2/model.pkl
Loading: ./agModels-wings_drag/models/CatBoost_BAG_L2/model.pkl
Loading: ./agModels-wings_drag/models/ExtraTreesMSE_BAG_L2/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetFastAI_BAG

                     model  score_test  score_val  pred_time_test  \
0   NeuralNetFastAI_BAG_L1   -0.080614  -0.082981        0.266239   
1      WeightedEnsemble_L5   -0.081011  -0.081175        7.497652   
2   NeuralNetFastAI_BAG_L4   -0.081029  -0.082144        7.165104   
3      WeightedEnsemble_L3   -0.081575  -0.080648        5.026024   
4      WeightedEnsemble_L4   -0.081784  -0.082370        6.293958   
5          CatBoost_BAG_L2   -0.081965  -0.081654        2.797598   
6   NeuralNetFastAI_BAG_L3   -0.082012  -0.084225        5.643897   
7          CatBoost_BAG_L3   -0.082133  -0.082925        5.469495   
8        LightGBMXT_BAG_L2   -0.082154  -0.082522        2.750088   
9           XGBoost_BAG_L2   -0.082220  -0.081700        2.884792   
10   NeuralNetTorch_BAG_L2   -0.082362  -0.082900        2.866781   
11    ExtraTreesMSE_BAG_L2   -0.082639  -0.081687        2.865007   
12         CatBoost_BAG_L4   -0.082836  -0.084197        6.994007   
13     WeightedEnsemble_L2   -0.08

In [9]:
print("AutoGluon infers problem type is: ", predictor.problem_type)
print("AutoGluon identified the following types of features:")
print(predictor.feature_metadata)

AutoGluon infers problem type is:  regression
AutoGluon identified the following types of features:
('float', []) : 129 | ['dim_1', 'dim_2', 'dim_3', 'dim_4', 'dim_5', ...]


In [10]:
train_data_pred = predictor.predict(train_data)
test_data_pred = predictor.predict(test_data)

import numpy as np
#save np array y_train_hat to a csv file
np.savetxt(f'./{data_name}_vectors_y_test_hat_{label}.csv', test_data_pred, delimiter=',')
np.savetxt(f'./{data_name}_vectors_y_train_hat_{label}.csv', train_data_pred, delimiter=',')

Loading: ./agModels-wings_drag/models/CatBoost_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/KNeighborsDist_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetFastAI_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetTorch_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/WeightedEnsemble_L2/model.pkl
Loading: ./agModels-wings_drag/models/CatBoost_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/KNeighborsDist_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetFastAI_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/NeuralNetTorch_BAG_L1/model.pkl
Loading: ./agModels-wings_drag/models/WeightedEnsemble_L2/model.pkl
