In [1]:
import os
import shutil
import pandas as pd
import mercury as mr
from supervised.automl import AutoML 

In [2]:
from typing import List

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
app = mr.App(title="Train AutoML 🧑‍💻", description="Train ML pipeline with MLJAR AutoML")

# Train Machine Learning Pipeline with MLJAR AutoML
Please follow the steps:
1. Upload CSV file with data. Data should heave column names in the first line.
2. Select input features and target column.
3. Select AutoML training mode, algorithms and training time limit.
4. Directory with all ML models will be zipped and available to download.

In [5]:
# data_file = mr.File(label="Upload CSV with training data", max_file_size="1MB")

In [6]:
# if data_file.filepath is None:
#     mr.Stop()

In [7]:
# df = pd.read_csv(data_file.filepath)

In [40]:
from sklearn.datasets import load_iris
df = load_iris(return_X_y = True, as_frame = True)

In [41]:
mr.Markdown("### Training data")

### Training data

In [42]:
df

(     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                  5.1               3.5                1.4               0.2
 1                  4.9               3.0                1.4               0.2
 2                  4.7               3.2                1.3               0.2
 3                  4.6               3.1                1.5               0.2
 4                  5.0               3.6                1.4               0.2
 ..                 ...               ...                ...               ...
 145                6.7               3.0                5.2               2.3
 146                6.3               2.5                5.0               1.9
 147                6.5               3.0                5.2               2.0
 148                6.2               3.4                5.4               2.3
 149                5.9               3.0                5.1               1.8
 
 [150 rows x 4 columns],
 0      0
 1      0
 2   

In [43]:
df[0]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [44]:
df

(     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                  5.1               3.5                1.4               0.2
 1                  4.9               3.0                1.4               0.2
 2                  4.7               3.2                1.3               0.2
 3                  4.6               3.1                1.5               0.2
 4                  5.0               3.6                1.4               0.2
 ..                 ...               ...                ...               ...
 145                6.7               3.0                5.2               2.3
 146                6.3               2.5                5.0               1.9
 147                6.5               3.0                5.2               2.0
 148                6.2               3.4                5.4               2.3
 149                5.9               3.0                5.1               1.8
 
 [150 rows x 4 columns],
 0      0
 1      0
 2   

In [45]:
x_columns = mr.MultiSelect(label="Input features", value=list(df[0].columns)[:-1], 
                           choices=list(df[0].columns))

mercury.MultiSelect

In [46]:
y_column = mr.Select(label="Target", value=list(df[0].columns)[-1], choices=list(df[0].columns))

mercury.Select

In [47]:
if x_columns.value is None or len(x_columns.value) == 0 or y_column.value is None:
    print("Please select input features and target column")
    mr.Stop()

In [68]:
mode = mr.Select(label="AutoML Mode", value="Explain", choices=["Explain", "Perform", "Compete"])

mercury.Select

In [49]:
algos = {
    "Explain": ["Baseline", "Linear", "Decision Tree", "Random Forest", "Xgboost", "Neural Network"],
    "Perform": ["Linear", "Random Forest", "LightGBM", "Xgboost", "CatBoost", "Neural Network"],
    "Compete": ["Decision Tree", "Random Forest", "Extra Trees", "LightGBM", 
                "Xgboost", "CatBoost", "Neural Network", "Nearest Neighbors"]
}

In [18]:
algorithms = mr.MultiSelect(label="Algorithms", value=algos[mode.value], choices=algos[mode.value])

mercury.MultiSelect

In [50]:
time_limit = mr.Select(label="Time limit (seconds)", value="60", choices=["60", "120", "240", "300"])

mercury.Select

In [69]:
start_training = mr.Button(label="Start training", style="success")

mercury.Button

In [52]:
output_dir = mr.OutputDir()

In [70]:
automl = AutoML(mode=mode.value, algorithms=algorithms.value,
                total_time_limit=int(time_limit.value))

In [54]:
x_columns.value

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)']

In [71]:
if start_training.clicked:
    mr.Markdown("### AutoML training logs")
    automl.fit(df[0][x_columns.value], df[0][y_column.value])
    
    output_filename = os.path.join(output_dir.path, automl._results_path)
    shutil.make_archive(output_filename, 'zip', automl._results_path)

### AutoML training logs

AutoML directory: AutoML_4
The task is regression with evaluation metric rmse
AutoML will use algorithms: ['Baseline', 'Linear', 'Decision Tree', 'Random Forest', 'Xgboost', 'Neural Network']
AutoML will stack models
AutoML will ensemble available models
AutoML steps: ['adjust_validation', 'simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'kmeans_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'boost_on_errors', 'ensemble', 'stack', 'ensemble_stacked']
* Step adjust_validation will try to check up to 1 model
1_DecisionTree rmse 0.17655 trained in 0.34 seconds
Disable stacking for split validation
* Step simple_algorithms will try to check up to 4 models
2_Baseline rmse 0.705007 trained in 0.35 seconds
3_DecisionTree rmse 0.169124 trained in 0.36 seconds
4_DecisionTree rmse 0.169124 trained in 0.35 seconds
5_Linear rmse 0.185766 trained in 0.36 seconds
* Step default_algorithms will try to check up to 3 models


In [72]:
automl

In [73]:
automl._best_model

<supervised.ensemble.Ensemble at 0x1d250be5c10>

In [74]:
print(automl._best_model)

<supervised.ensemble.Ensemble object at 0x000001D250BE5C10>


In [75]:
if automl._best_model is None:
    mr.Stop()

In [76]:
automl.report()

Best model,name,model_type,metric_type,metric_value,train_time
,1_DecisionTree,Decision Tree,rmse,0.17655,0.68
,2_Baseline,Baseline,rmse,0.705007,0.69
,3_DecisionTree,Decision Tree,rmse,0.169124,0.66
,4_DecisionTree,Decision Tree,rmse,0.169124,0.65
,5_Linear,Linear,rmse,0.185766,0.67
,6_Default_Xgboost,Xgboost,rmse,0.148858,0.79
,7_Default_NeuralNetwork,Neural Network,rmse,0.149428,0.77
,8_Default_RandomForest,Random Forest,rmse,0.115134,1.06
,9_Xgboost,Xgboost,rmse,0.133685,0.85
,18_RandomForest,Random Forest,rmse,0.120136,1.03

Metric,Score
MAE,0.130615
MSE,0.0247578
RMSE,0.157346
R2,0.947665
MAPE,0.222804

Metric,Score
MAE,0.13997
MSE,0.0356321
RMSE,0.188765
R2,0.924679
MAPE,0.20243

Metric,Score
MAE,0.594858
MSE,0.49059
RMSE,0.700421
R2,-0.037042
MAPE,1.71653

Metric,Score
MAE,0.220566
MSE,0.0752663
RMSE,0.274347
R2,0.840897
MAPE,0.359979

Metric,Score
MAE,0.095322
MSE,0.0227869
RMSE,0.150953
R2,0.951831
MAPE,0.120676

Metric,Score
MAE,0.10817
MSE,0.0219201
RMSE,0.148054
R2,0.953664
MAPE,0.176568

Metric,Score
MAE,0.153385
MSE,0.0333913
RMSE,0.182733
R2,0.929415
MAPE,0.303535

Metric,Score
MAE,0.125691
MSE,0.0303938
RMSE,0.174338
R2,0.935752
MAPE,0.174874

Metric,Score
MAE,0.0787661
MSE,0.0144327
RMSE,0.120136
R2,0.969491
MAPE,0.127654

Metric,Score
MAE,0.108199
MSE,0.0184088
RMSE,0.135679
R2,0.961086
MAPE,0.185091

Metric,Score
MAE,0.081461
MSE,0.0141083
RMSE,0.118778
R2,0.970177
MAPE,0.109689

Metric,Score
MAE,0.104263
MSE,0.0177158
RMSE,0.133101
R2,0.962551
MAPE,0.183676

Metric,Score
MAE,0.131637
MSE,0.0311699
RMSE,0.17655
R2,0.934111
MAPE,0.172075

Metric,Score
MAE,0.10135
MSE,0.0173577
RMSE,0.131749
R2,0.963308
MAPE,0.170388

Metric,Score
MAE,0.117754
MSE,0.0242514
RMSE,0.155729
R2,0.948736
MAPE,0.157697

Metric,Score
MAE,0.0927415
MSE,0.0138537
RMSE,0.117702
R2,0.970715
MAPE,0.164684

Metric,Score
MAE,0.101714
MSE,0.0187234
RMSE,0.136833
R2,0.960421
MAPE,0.150585

Metric,Score
MAE,0.0924999
MSE,0.0140483
RMSE,0.118526
R2,0.970304
MAPE,0.151346

Metric,Score
MAE,0.105603
MSE,0.0184012
RMSE,0.135651
R2,0.961102
MAPE,0.167615

Metric,Score
MAE,0.138688
MSE,0.0280412
RMSE,0.167455
R2,0.940725
MAPE,0.273533

Metric,Score
MAE,0.0973455
MSE,0.0181446
RMSE,0.134702
R2,0.961645
MAPE,0.149103

Metric,Score
MAE,0.0925177
MSE,0.0162203
RMSE,0.127359
R2,0.965712
MAPE,0.129933

Metric,Score
MAE,0.137488
MSE,0.0289635
RMSE,0.170187
R2,0.938775
MAPE,0.236707

Metric,Score
MAE,0.142225
MSE,0.0284123
RMSE,0.168559
R2,0.93994
MAPE,0.188888

Metric,Score
MAE,0.143213
MSE,0.0332119
RMSE,0.182241
R2,0.929795
MAPE,0.20136

Metric,Score
MAE,0.592346
MSE,0.497034
RMSE,0.705007
R2,-0.0506644
MAPE,1.74671

Metric,Score
MAE,0.137924
MSE,0.0291162
RMSE,0.170635
R2,0.938452
MAPE,0.212513

Metric,Score
MAE,0.179881
MSE,0.0520242
RMSE,0.228088
R2,0.890028
MAPE,0.290209

Metric,Score
MAE,0.158705
MSE,0.0431947
RMSE,0.207833
R2,0.908692
MAPE,0.271079

Metric,Score
MAE,0.180783
MSE,0.0519316
RMSE,0.227885
R2,0.890223
MAPE,0.313883

Metric,Score
MAE,0.400978
MSE,0.203818
RMSE,0.451462
R2,0.569155
MAPE,0.731695

Metric,Score
MAE,0.18495
MSE,0.0591081
RMSE,0.243122
R2,0.875053
MAPE,0.375463

Metric,Score
MAE,0.0843126
MSE,0.0132558
RMSE,0.115134
R2,0.971979
MAPE,0.155232

Metric,Score
MAE,0.0838224
MSE,0.0130884
RMSE,0.114405
R2,0.972333
MAPE,0.146939

Metric,Score
MAE,0.116443
MSE,0.0270119
RMSE,0.164353
R2,0.9429
MAPE,0.135821

Metric,Score
MAE,0.0927415
MSE,0.0138537
RMSE,0.117702
R2,0.970715
MAPE,0.164684

Metric,Score
MAE,0.126476
MSE,0.028603
RMSE,0.169124
R2,0.939537
MAPE,0.160964

Metric,Score
MAE,0.0924999
MSE,0.0140483
RMSE,0.118526
R2,0.970304
MAPE,0.151346

Metric,Score
MAE,0.0918254
MSE,0.0156361
RMSE,0.125044
R2,0.966947
MAPE,0.150041

Metric,Score
MAE,0.0940071
MSE,0.0175972
RMSE,0.132654
R2,0.962802
MAPE,0.161375

Metric,Score
MAE,0.0970104
MSE,0.0180967
RMSE,0.134524
R2,0.961746
MAPE,0.149916

Metric,Score
MAE,0.109262
MSE,0.0224338
RMSE,0.149779
R2,0.952578
MAPE,0.191348

Metric,Score
MAE,0.120128
MSE,0.0215793
RMSE,0.146899
R2,0.954384
MAPE,0.178894

Metric,Score
MAE,0.124938
MSE,0.0224777
RMSE,0.149926
R2,0.952485
MAPE,0.194198

Metric,Score
MAE,0.22142
MSE,0.0696868
RMSE,0.263983
R2,0.852691
MAPE,0.337874

Metric,Score
MAE,0.126621
MSE,0.0253641
RMSE,0.159261
R2,0.946384
MAPE,0.224026

Metric,Score
MAE,0.146147
MSE,0.0300858
RMSE,0.173453
R2,0.936403
MAPE,0.192723

Metric,Score
MAE,0.126476
MSE,0.028603
RMSE,0.169124
R2,0.939537
MAPE,0.160964

Metric,Score
MAE,0.166216
MSE,0.0377496
RMSE,0.194293
R2,0.920202
MAPE,0.256102

Metric,Score
MAE,0.11118
MSE,0.0199803
RMSE,0.141352
R2,0.957764
MAPE,0.162812

Metric,Score
MAE,0.0865903
MSE,0.0136444
RMSE,0.116809
R2,0.971158
MAPE,0.148416

Metric,Score
MAE,0.0838224
MSE,0.0130884
RMSE,0.114405
R2,0.972333
MAPE,0.146939

Metric,Score
MAE,0.0850019
MSE,0.0135904
RMSE,0.116578
R2,0.971272
MAPE,0.155688

Metric,Score
MAE,0.0843126
MSE,0.0132558
RMSE,0.115134
R2,0.971979
MAPE,0.155232

Metric,Score
MAE,0.0850019
MSE,0.0135904
RMSE,0.116578
R2,0.971272
MAPE,0.155688

Metric,Score
MAE,0.0843126
MSE,0.0132558
RMSE,0.115134
R2,0.971979
MAPE,0.155232

Metric,Score
MAE,0.111092
MSE,0.0272266
RMSE,0.165005
R2,0.942447
MAPE,0.1912

Metric,Score
MAE,0.109982
MSE,0.0269927
RMSE,0.164295
R2,0.942941
MAPE,0.192073

Metric,Score
MAE,0.146459
MSE,0.0345089
RMSE,0.185766
R2,0.927053
MAPE,0.207643

Metric,Score
MAE,0.107472
MSE,0.0269541
RMSE,0.164177
R2,0.943023
MAPE,0.190765

Metric,Score
MAE,0.136301
MSE,0.0232886
RMSE,0.152606
R2,0.950771
MAPE,0.272521

Metric,Score
MAE,0.202144
MSE,0.0584212
RMSE,0.241705
R2,0.876505
MAPE,0.431875

Metric,Score
MAE,0.210344
MSE,0.0704049
RMSE,0.265339
R2,0.851173
MAPE,0.517394

Metric,Score
MAE,0.164225
MSE,0.0464141
RMSE,0.215439
R2,0.901887
MAPE,0.209525

Metric,Score
MAE,0.121623
MSE,0.0221588
RMSE,0.148858
R2,0.953159
MAPE,0.179401

Metric,Score
MAE,0.111691
MSE,0.0223287
RMSE,0.149428
R2,0.9528
MAPE,0.177171

Metric,Score
MAE,0.0843126
MSE,0.0132558
RMSE,0.115134
R2,0.971979
MAPE,0.155232

Metric,Score
MAE,0.10005
MSE,0.0152685
RMSE,0.123566
R2,0.967724
MAPE,0.163106

Metric,Score
MAE,0.0863058
MSE,0.014629
RMSE,0.12095
R2,0.969076
MAPE,0.14072

Metric,Score
MAE,0.0858314
MSE,0.014258
RMSE,0.119407
R2,0.96986
MAPE,0.144907

Metric,Score
MAE,0.0939143
MSE,0.0178716
RMSE,0.133685
R2,0.962222
MAPE,0.158601

Model,Weight
12_Xgboost,5
18_RandomForest,10
25_RandomForest,6
26_RandomForest,2
34_NeuralNetwork,4
37_RandomForest,3
43_Xgboost,1
51_DecisionTree,6
5_Linear,24
61_NeuralNetwork,4

Metric,Score
MAE,0.0729458
MSE,0.00943209
RMSE,0.0971189
R2,0.980062
MAPE,0.160801
