From 9498a2fe8da7135ca0a2f5c8267968baf2d08f84 Mon Sep 17 00:00:00 2001 From: Pakulin Sergei Date: Tue, 2 May 2023 16:45:27 +0300 Subject: [PATCH 1/4] +csv support --- docs/source/benchmarks/amlb_res.csv | 33 +++++++++++++++++++++++++++++ docs/source/benchmarks/tabular.rst | 8 +++++++ 2 files changed, 41 insertions(+) create mode 100644 docs/source/benchmarks/amlb_res.csv diff --git a/docs/source/benchmarks/amlb_res.csv b/docs/source/benchmarks/amlb_res.csv new file mode 100644 index 0000000000..0d26ceb35b --- /dev/null +++ b/docs/source/benchmarks/amlb_res.csv @@ -0,0 +1,33 @@ +Dataset name,Metric name,AutoGluon,FEDOT,H2O,LAMA +APSFailure,auc,0.99,0.991,, +Amazon_employee_access,auc,0.857,0.865,, +Australian,auc,0.94,0.939,0.939,0.945 +Covertype,neg_logloss,-0.071,-0.117,, +Fashion-MNIST,neg_logloss,-0.329,-0.373,, +Jannis,neg_logloss,-0.728,-0.737,, +KDDCup09_appetency,auc,0.804,0.822,, +MiniBooNE,auc,0.982,0.981,, +Shuttle,neg_logloss,-0.001,-0.001,, +Volkert,neg_logloss,-0.917,-1.097,, +adult,auc,0.91,0.925,, +bank-marketing,auc,0.931,0.935,, +blood-transfusion,auc,0.69,0.759,0.765,0.75 +car,neg_logloss,-0.117,-0.011,-0.004,-0.002 +christine,auc,0.804,0.812,0.823,0.83 +cnae-9,neg_logloss,-0.332,-0.211,-0.175,-0.156 +connect-4,neg_logloss,-0.502,-0.456,, +credit-g,auc,0.795,0.778,0.789,0.796 +dilbert,neg_logloss,-0.148,-0.159,-0.05,-0.033 +fabert,neg_logloss,-0.788,-0.895,-0.752,-0.766 +guillermo,auc,0.9,0.891,, +jasmine,auc,0.883,0.888,0.887,0.88 +jungle_chess_2pcs_raw_endgame_complete,neg_logloss,-0.431,-0.193,, +kc1,auc,0.822,0.843,,0.831 +kr-vs-kp,auc,0.999,1.0,,1.0 +mfeat-factors,neg_logloss,-0.161,-0.094,,-0.082 +nomao,auc,0.995,0.994,, +numerai28_6,auc,0.517,0.529,, +phoneme,auc,0.965,0.965,,0.965 +segment,neg_logloss,-0.094,-0.062,,-0.061 +sylvine,auc,0.985,0.988,,0.988 +vehicle,neg_logloss,-0.515,-0.354,,-0.404 diff --git a/docs/source/benchmarks/tabular.rst b/docs/source/benchmarks/tabular.rst index 1e9bb70ed6..052a5dd66c 100644 --- a/docs/source/benchmarks/tabular.rst +++ b/docs/source/benchmarks/tabular.rst @@ -23,3 +23,11 @@ Also, the comparison was conducted against the state-of-the-art AutoGluon framew :width: 80% There is a small advantage of the FEDOT for F1 and ROC AUC metrics, but the other metrics are near equal. + +There are even more overall classification problem results across popular AutoML frameworks: + +.. csv-table:: Classification statistics + :file: amlb_res.csv + :align: center + :widths: auto + :header-rows: 1 From 0e57231209bdbf3c0725d36846ce3116486f31e8 Mon Sep 17 00:00:00 2001 From: Pakulin Sergei Date: Wed, 3 May 2023 14:35:04 +0300 Subject: [PATCH 2/4] shortened dataset name --- docs/source/benchmarks/amlb_res.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/benchmarks/amlb_res.csv b/docs/source/benchmarks/amlb_res.csv index 0d26ceb35b..8464f94e1a 100644 --- a/docs/source/benchmarks/amlb_res.csv +++ b/docs/source/benchmarks/amlb_res.csv @@ -21,7 +21,7 @@ dilbert,neg_logloss,-0.148,-0.159,-0.05,-0.033 fabert,neg_logloss,-0.788,-0.895,-0.752,-0.766 guillermo,auc,0.9,0.891,, jasmine,auc,0.883,0.888,0.887,0.88 -jungle_chess_2pcs_raw_endgame_complete,neg_logloss,-0.431,-0.193,, +jungle chess,neg_logloss,-0.431,-0.193,, kc1,auc,0.822,0.843,,0.831 kr-vs-kp,auc,0.999,1.0,,1.0 mfeat-factors,neg_logloss,-0.161,-0.094,,-0.082 From 6c7f180541e2371dda86159d56e604a61b644a4e Mon Sep 17 00:00:00 2001 From: Pakulin Sergei Date: Thu, 4 May 2023 18:59:44 +0300 Subject: [PATCH 3/4] add lama's medium datasets --- docs/source/benchmarks/amlb_res.csv | 30 ++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/source/benchmarks/amlb_res.csv b/docs/source/benchmarks/amlb_res.csv index 8464f94e1a..07fb820c62 100644 --- a/docs/source/benchmarks/amlb_res.csv +++ b/docs/source/benchmarks/amlb_res.csv @@ -1,32 +1,32 @@ Dataset name,Metric name,AutoGluon,FEDOT,H2O,LAMA -APSFailure,auc,0.99,0.991,, -Amazon_employee_access,auc,0.857,0.865,, +APSFailure,auc,0.99,0.991,,0.992 +Amazon_employee_access,auc,0.857,0.865,,0.879 Australian,auc,0.94,0.939,0.939,0.945 Covertype,neg_logloss,-0.071,-0.117,, -Fashion-MNIST,neg_logloss,-0.329,-0.373,, -Jannis,neg_logloss,-0.728,-0.737,, -KDDCup09_appetency,auc,0.804,0.822,, -MiniBooNE,auc,0.982,0.981,, -Shuttle,neg_logloss,-0.001,-0.001,, -Volkert,neg_logloss,-0.917,-1.097,, -adult,auc,0.91,0.925,, -bank-marketing,auc,0.931,0.935,, +Fashion-MNIST,neg_logloss,-0.329,-0.373,,-0.248 +Jannis,neg_logloss,-0.728,-0.737,,-0.664 +KDDCup09_appetency,auc,0.804,0.822,,0.85 +MiniBooNE,auc,0.982,0.981,,0.988 +Shuttle,neg_logloss,-0.001,-0.001,,-0.001 +Volkert,neg_logloss,-0.917,-1.097,,-0.806 +adult,auc,0.91,0.925,,0.932 +bank-marketing,auc,0.931,0.935,,0.94 blood-transfusion,auc,0.69,0.759,0.765,0.75 car,neg_logloss,-0.117,-0.011,-0.004,-0.002 christine,auc,0.804,0.812,0.823,0.83 cnae-9,neg_logloss,-0.332,-0.211,-0.175,-0.156 -connect-4,neg_logloss,-0.502,-0.456,, +connect-4,neg_logloss,-0.502,-0.456,,-0.337 credit-g,auc,0.795,0.778,0.789,0.796 dilbert,neg_logloss,-0.148,-0.159,-0.05,-0.033 fabert,neg_logloss,-0.788,-0.895,-0.752,-0.766 -guillermo,auc,0.9,0.891,, +guillermo,auc,0.9,0.891,,0.926 jasmine,auc,0.883,0.888,0.887,0.88 -jungle chess,neg_logloss,-0.431,-0.193,, +jungle chess,neg_logloss,-0.431,-0.193,,-0.149 kc1,auc,0.822,0.843,,0.831 kr-vs-kp,auc,0.999,1.0,,1.0 mfeat-factors,neg_logloss,-0.161,-0.094,,-0.082 -nomao,auc,0.995,0.994,, -numerai28_6,auc,0.517,0.529,, +nomao,auc,0.995,0.994,,0.997 +numerai28_6,auc,0.517,0.529,,0.531 phoneme,auc,0.965,0.965,,0.965 segment,neg_logloss,-0.094,-0.062,,-0.061 sylvine,auc,0.985,0.988,,0.988 From 8cf7825b2243d2133724d12e01baccf752269b18 Mon Sep 17 00:00:00 2001 From: Pakulin Sergei Date: Wed, 17 May 2023 16:37:44 +0300 Subject: [PATCH 4/4] shorten page's text --- docs/source/benchmarks/tabular.rst | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/docs/source/benchmarks/tabular.rst b/docs/source/benchmarks/tabular.rst index 052a5dd66c..f8a4099a10 100644 --- a/docs/source/benchmarks/tabular.rst +++ b/docs/source/benchmarks/tabular.rst @@ -1,30 +1,7 @@ Tabular data ------------ -The subset of PMLB benchmarks was evaluated for FEDOT, `TPOT `__, `MLBox `__ and XGboost baseline. The results and metadata are presented below. - -|Metadata for datasets| - -.. |Metadata for datasets| image:: img_benchmarks/fedot_meta.png - :width: 80% - -|Metrics for prediction| - -.. |Metrics for prediction| image:: img_benchmarks/fedot_classregr.png - :width: 80% - -As we can see from the table, the results obtained during the experiments demonstrate the advantage of composite pipelines created by the FEDOT over less sophisticated competitors. The only exception is a single case for regression and classification problems respectively, where the maximum value of the quality metric was obtained using a static pipeline. - -Also, the comparison was conducted against the state-of-the-art AutoGluon framework. - -|Comparison of FEDOT and AutoGluon| - -.. |Comparison of FEDOT and AutoGluon| image:: img_benchmarks/fedot_class_gluon.png - :width: 80% - -There is a small advantage of the FEDOT for F1 and ROC AUC metrics, but the other metrics are near equal. - -There are even more overall classification problem results across popular AutoML frameworks: +Here are overall classification problem results across popular AutoML frameworks: .. csv-table:: Classification statistics :file: amlb_res.csv