In [1]:
import os

from fedot_ind.api.main import FedotIndustrial
from fedot_ind.core.architecture.postprocessing.results_picker import ResultsPicker
from fedot_ind.core.ensemble.rank_ensembler import RankEnsemble

In [19]:
generators = ['topological', 'quantile', 'recurrence']
dataset_name = 'DistalPhalanxOutlineAgeGroup'
output_folder = os.path.abspath('./results_of_experiments')
os.makedirs(output_folder, exist_ok=True)

In [20]:
for generator in generators:
    config = dict(task='ts_classification',
                  dataset=dataset_name,
                  feature_generator=generator,
                  use_cache=False,
                  timeout=1,
                  n_jobs=2,
                  window_sizes='auto',
                  logging_level=20)

    industrial = FedotIndustrial(input_config=config, output_folder=output_folder)
    train_data, test_data, _ = industrial.reader.read(dataset_name=dataset_name)
    model = industrial.fit(train_features=train_data[0], train_target=train_data[1])

    labels = industrial.predict(test_features=test_data[0])
    probs = industrial.predict_proba(test_features=test_data[0])
    metric = industrial.get_metrics(target=test_data[1],
                                    metric_names=['f1', 'roc_auc'])

    for pred, kind in zip([labels, probs], ['labels', 'probs']):
        industrial.save_predict(predicted_data=pred, kind=kind)

    industrial.save_metrics(metrics=metric)

2023-04-06 17:11:50,036 - Initialising experiment setup
2023-04-06 17:11:50,048 - Experiment setup:
        dataset - DistalPhalanxOutlineAgeGroup,
        feature generator - topological,
        use_cache - False,
        error_correction - False,
        n_jobs - 2,
        timeout - 1,
        ensemble - None
2023-04-06 17:11:50,050 - Initialising solver
2023-04-06 17:11:50,052 - TimeSeriesClassifier initialised
2023-04-06 17:11:50,054 - Trying to read DistalPhalanxOutlineAgeGroup data locally
2023-04-06 17:11:50,099 - Loaded data from DistalPhalanxOutlineAgeGroup local data folder
2023-04-06 17:11:50,100 - 3 classes detected
2023-04-06 17:11:50,103 - Fitting model
2023-04-06 17:11:50,105 - Topological features extraction started
2023-04-06 17:11:50,109 - Searching optimal Takens embedding parameters


Time series processed: 100%|[30m██████████[0m| 400/400 [00:09<00:00, 42.34ts/s]

2023-04-06 17:11:59,562 - Optimal TE parameters: dimension = 3, time_delay = 1





2023-04-06 17:12:06,854 - AssumptionsHandler - Initial pipeline fitting started
2023-04-06 17:12:07,954 - AssumptionsHandler - Initial pipeline was fitted successfully
2023-04-06 17:12:07,956 - AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 0.3 MiB, max: 1.5 MiB
2023-04-06 17:12:07,958 - ApiComposer - Initial pipeline was fitted in 1.1 sec.
2023-04-06 17:12:07,960 - AssumptionsHandler - Preset was changed to best_quality due to fit time estimation for initial model.
2023-04-06 17:12:07,975 - ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['poly_features', 'bernb', 'lgbm', 'pca', 'dt', 'isolation_forest_class', 'rf', 'mlp', 'fast_ica', 'resample', 'knn', 'logit', 'normalization', 'scaling', 'qda', 'poly_features', 'bernb', 'lgbm', 'pca', 'dt', 'isolation_forest_class', 'rf', 'mlp', 'fast_ica', 'resample', 'knn', 'logit', 'normalization', 'scaling', 'qda'].
2023-04-06 17:12:07,980

Generations:   0%|          | 1/10000 [00:00<?, ?gen/s]

2023-04-06 17:12:07,988 - MultiprocessingDispatcher - Number of used CPU's: 2
2023-04-06 17:12:15,265 - MultiprocessingDispatcher - 1 individuals out of 1 in previous population were evaluated successfully.
2023-04-06 17:12:15,271 - EvoGraphOptimizer - Generation num: 1 size: 1
2023-04-06 17:12:15,273 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (1): ['<ClassificationMetricsEnum.ROCAUC_penalty=-0.893 ComplexityMetricsEnum.node_num=0.200>']
2023-04-06 17:12:17,773 - MultiprocessingDispatcher - Number of used CPU's: 2
2023-04-06 17:12:25,017 - MultiprocessingDispatcher - 21 individuals out of 21 in previous population were evaluated successfully.
2023-04-06 17:12:25,022 - EvoGraphOptimizer - Generation num: 2 size: 21
2023-04-06 17:12:25,024 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (1): ['<ClassificationMetricsEnum.ROCAUC_penalty=-0.898 ComplexityMetricsEnum.node_num=0.400>']
2023-04-06 17:12:25,027 - EvoGraphOptimizer - Next population siz

Generations:   0%|          | 1/10000 [00:32<?, ?gen/s]

2023-04-06 17:12:40,647 - OptimisationTimer - Composition time: 0.544 min
2023-04-06 17:12:40,649 - OptimisationTimer - Algorithm was terminated due to processing time limit
2023-04-06 17:12:40,652 - EvoGraphOptimizer - Generation num: 6 size: 1
2023-04-06 17:12:40,654 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (1): ['<ClassificationMetricsEnum.ROCAUC_penalty=-0.898 ComplexityMetricsEnum.node_num=0.400>']
2023-04-06 17:12:40,657 - EvoGraphOptimizer - no improvements for 4 iterations
2023-04-06 17:12:40,659 - EvoGraphOptimizer - spent time: 0.5 min
2023-04-06 17:12:40,667 - GPComposer - GP composition finished
2023-04-06 17:12:40,672 - DataSourceSplitter - K-folds cross validation is applied.
2023-04-06 17:12:40,676 - ApiComposer - Hyperparameters tuning started with 0 min. timeout
2023-04-06 17:12:40,682 - SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial pipeline





2023-04-06 17:12:43,004 - SimultaneousTuner - Initial graph: {'depth': 2, 'length': 4, 'nodes': [rf, scaling, poly_features, normalization]}
rf - {'n_jobs': 1}
scaling - {}
poly_features - {'degree': 3, 'interaction_only': False}
normalization - {} 
Initial metric: 0.891
  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]2023-04-06 17:12:43,046 - build_posterior_wrapper took 0.017705 seconds
2023-04-06 17:12:43,048 - TPE using 0 trials
 10%|█         | 1/10 [00:03<00:29,  3.26s/trial, best loss: -0.8976]2023-04-06 17:12:46,300 - build_posterior_wrapper took 0.013342 seconds
2023-04-06 17:12:46,303 - TPE using 1/1 trials with best loss -0.897600
 20%|██        | 2/10 [00:07<00:28,  3.55s/trial, best loss: -0.8976]2023-04-06 17:12:50,056 - build_posterior_wrapper took 0.013399 seconds
2023-04-06 17:12:50,059 - TPE using 2/2 trials with best loss -0.897600
 30%|███       | 3/10 [00:10<00:25,  3.65s/trial, best loss: -0.8976]2023-04-06 17:12:53,817 - build_posterior_wrapper took 0.01482

TS processed: 100%|[32m██████████[0m| 400/400 [00:05<00:00, 78.78 ts/s] 

2023-04-06 17:13:22,768 - Statistical features extraction finished





2023-04-06 17:13:23,307 - AssumptionsHandler - Initial pipeline fitting started
2023-04-06 17:13:24,606 - AssumptionsHandler - Initial pipeline was fitted successfully
2023-04-06 17:13:24,608 - AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 1.2 MiB, max: 2.2 MiB
2023-04-06 17:13:24,610 - ApiComposer - Initial pipeline was fitted in 1.3 sec.
2023-04-06 17:13:24,613 - AssumptionsHandler - Preset was changed to fast_train due to fit time estimation for initial model.
2023-04-06 17:13:24,627 - ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['bernb', 'dt', 'pca', 'rf', 'knn', 'logit', 'normalization', 'scaling', 'qda', 'bernb', 'dt', 'pca', 'rf', 'knn', 'logit', 'normalization', 'scaling', 'qda'].
2023-04-06 17:13:24,632 - ApiComposer - Timeout is too small for composing and is skipped because fit_time is 1.305389 sec.
2023-04-06 17:13:24,636 - DataSourceSplitter - K-folds cross vali

TS processed: 100%|[32m██████████[0m| 139/139 [00:02<00:00, 51.93 ts/s] 


2023-04-06 17:14:27,045 - Statistical features extraction finished
2023-04-06 17:14:27,125 - Predicting with quantile generator
2023-04-06 17:14:27,179 - Calculating metrics: ['f1', 'roc_auc']
2023-04-06 17:14:27,187 - Metrics are: {'f1': 0.735, 'roc_auc': 0.891}
2023-04-06 17:14:27,188 - Saving predicted labels to /Users/technocreep/Desktop/Working-Folder/fedot-industrial/Fedot.Industrial/examples/time_series_classification/results_of_experiments/quantile/DistalPhalanxOutlineAgeGroup
2023-04-06 17:14:27,192 - Saving predicted probs to /Users/technocreep/Desktop/Working-Folder/fedot-industrial/Fedot.Industrial/examples/time_series_classification/results_of_experiments/quantile/DistalPhalanxOutlineAgeGroup
2023-04-06 17:14:27,196 - Saving predicted metrics to /Users/technocreep/Desktop/Working-Folder/fedot-industrial/Fedot.Industrial/examples/time_series_classification/results_of_experiments/quantile/DistalPhalanxOutlineAgeGroup
2023-04-06 17:14:27,198 - Initialising experiment setup
20

Feature Generation. TS processed: 100%|[30m██████████[0m| 400/400 [00:02<00:00, 177.72 ts/s]

2023-04-06 17:14:29,595 - Recurrence feature extraction finished





2023-04-06 17:14:29,700 - AssumptionsHandler - Initial pipeline fitting started
2023-04-06 17:14:30,805 - AssumptionsHandler - Initial pipeline was fitted successfully
2023-04-06 17:14:30,807 - AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 0.5 MiB, max: 1.9 MiB
2023-04-06 17:14:30,810 - ApiComposer - Initial pipeline was fitted in 1.1 sec.
2023-04-06 17:14:30,812 - AssumptionsHandler - Preset was changed to best_quality due to fit time estimation for initial model.
2023-04-06 17:14:30,826 - ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 1 min. Set of candidate models: ['poly_features', 'bernb', 'lgbm', 'pca', 'dt', 'isolation_forest_class', 'rf', 'mlp', 'fast_ica', 'resample', 'knn', 'logit', 'normalization', 'scaling', 'qda', 'poly_features', 'bernb', 'lgbm', 'pca', 'dt', 'isolation_forest_class', 'rf', 'mlp', 'fast_ica', 'resample', 'knn', 'logit', 'normalization', 'scaling', 'qda'].
2023-04-06 17:14:30,831

Generations:   0%|          | 1/10000 [00:00<?, ?gen/s]

2023-04-06 17:14:30,841 - MultiprocessingDispatcher - Number of used CPU's: 2
2023-04-06 17:14:37,175 - MultiprocessingDispatcher - 1 individuals out of 1 in previous population were evaluated successfully.
2023-04-06 17:14:37,181 - EvoGraphOptimizer - Generation num: 1 size: 1
2023-04-06 17:14:37,183 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (1): ['<ClassificationMetricsEnum.ROCAUC_penalty=-0.920 ComplexityMetricsEnum.node_num=0.200>']
2023-04-06 17:14:39,485 - MultiprocessingDispatcher - Number of used CPU's: 2
2023-04-06 17:14:46,822 - MultiprocessingDispatcher - 21 individuals out of 21 in previous population were evaluated successfully.
2023-04-06 17:14:46,825 - EvoGraphOptimizer - Generation num: 2 size: 21
2023-04-06 17:14:46,827 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (1): ['<ClassificationMetricsEnum.ROCAUC_penalty=-0.927 ComplexityMetricsEnum.node_num=0.100>']
2023-04-06 17:14:46,829 - EvoGraphOptimizer - Next population siz

Generations:   0%|          | 1/10000 [00:35<?, ?gen/s]

2023-04-06 17:15:06,709 - OptimisationTimer - Composition time: 0.598 min
2023-04-06 17:15:06,712 - OptimisationTimer - Algorithm was terminated due to processing time limit
2023-04-06 17:15:06,714 - EvoGraphOptimizer - Generation num: 7 size: 1
2023-04-06 17:15:06,716 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (1): ['<ClassificationMetricsEnum.ROCAUC_penalty=-0.927 ComplexityMetricsEnum.node_num=0.100>']
2023-04-06 17:15:06,719 - EvoGraphOptimizer - no improvements for 5 iterations
2023-04-06 17:15:06,721 - EvoGraphOptimizer - spent time: 0.6 min
2023-04-06 17:15:06,725 - GPComposer - GP composition finished
2023-04-06 17:15:06,729 - DataSourceSplitter - K-folds cross validation is applied.
2023-04-06 17:15:06,732 - ApiComposer - Hyperparameters tuning started with 0 min. timeout
2023-04-06 17:15:06,736 - SimultaneousTuner - Hyperparameters optimization start: estimation of metric for initial pipeline





2023-04-06 17:15:08,932 - SimultaneousTuner - Initial graph: {'depth': 1, 'length': 1, 'nodes': [rf]}
rf - {'n_jobs': 1} 
Initial metric: 0.922
  0%|          | 0/100000 [00:00<?, ?trial/s, best loss=?]2023-04-06 17:15:08,961 - build_posterior_wrapper took 0.009748 seconds
2023-04-06 17:15:08,963 - TPE using 0 trials
  0%|          | 1/100000 [00:01<44:12:12,  1.59s/trial, best loss: -0.924883]2023-04-06 17:15:10,553 - build_posterior_wrapper took 0.009198 seconds
2023-04-06 17:15:10,555 - TPE using 1/1 trials with best loss -0.924883
  0%|          | 2/100000 [00:03<57:12:47,  2.06s/trial, best loss: -0.9248830000000001]2023-04-06 17:15:12,942 - build_posterior_wrapper took 0.009816 seconds
2023-04-06 17:15:12,944 - TPE using 2/2 trials with best loss -0.924883
  0%|          | 3/100000 [00:06<60:25:51,  2.18s/trial, best loss: -0.9253826666666667]2023-04-06 17:15:15,254 - build_posterior_wrapper took 0.009212 seconds
2023-04-06 17:15:15,257 - TPE using 3/3 trials with best loss -0.92

Feature Generation. TS processed: 100%|[30m██████████[0m| 139/139 [00:02<00:00, 67.19 ts/s] 

2023-04-06 17:15:32,907 - Recurrence feature extraction finished
2023-04-06 17:15:32,941 - Predicting with recurrence generator





2023-04-06 17:15:32,968 - Calculating metrics: ['f1', 'roc_auc']
2023-04-06 17:15:32,977 - Metrics are: {'f1': 0.686, 'roc_auc': 0.832}
2023-04-06 17:15:32,978 - Saving predicted labels to /Users/technocreep/Desktop/Working-Folder/fedot-industrial/Fedot.Industrial/examples/time_series_classification/results_of_experiments/recurrence/DistalPhalanxOutlineAgeGroup
2023-04-06 17:15:32,982 - Saving predicted probs to /Users/technocreep/Desktop/Working-Folder/fedot-industrial/Fedot.Industrial/examples/time_series_classification/results_of_experiments/recurrence/DistalPhalanxOutlineAgeGroup
2023-04-06 17:15:32,985 - Saving predicted metrics to /Users/technocreep/Desktop/Working-Folder/fedot-industrial/Fedot.Industrial/examples/time_series_classification/results_of_experiments/recurrence/DistalPhalanxOutlineAgeGroup


In [21]:
picker = ResultsPicker(path=output_folder)
proba_dict, metric_dict = picker.run()

In [22]:
metrics_df = picker.run(get_metrics_df=True, add_info=True)
metrics_df

  metrics_df = metrics_df.append({'dataset': ds, 'experiment': exp,
  metrics_df = metrics_df.append({'dataset': ds, 'experiment': exp,
  metrics_df = metrics_df.append({'dataset': ds, 'experiment': exp,
  metrics_df = metrics_df.append({'dataset': ds, 'experiment': exp,
  metrics_df = metrics_df.append({'dataset': ds, 'experiment': exp,
  metrics_df = metrics_df.append({'dataset': ds, 'experiment': exp,


Unnamed: 0,dataset,experiment,f1,roc_auc,accuracy,precision,logloss,train_size,test_size,length,multivariate_flag,number_of_classes,type
0,ECG5000,recurrence,0.006,0.857,,,,500,4500,140,0,5,ecg
1,ECG5000,quantile,0.007,0.939,,,,500,4500,140,0,5,ecg
2,ECG5000,topological,0.002,0.801,,,,500,4500,140,0,5,ecg
3,DistalPhalanxOutlineAgeGroup,recurrence,0.686,0.832,,,,400,139,80,0,3,image
4,DistalPhalanxOutlineAgeGroup,quantile,0.735,0.891,,,,400,139,80,0,3,image
5,DistalPhalanxOutlineAgeGroup,topological,0.688,0.805,,,,400,139,80,0,3,image


In [23]:
ensembler = RankEnsemble(dataset_name=dataset_name,
                         proba_dict=proba_dict,
                         metric_dict=metric_dict)
ensembler.ensemble()

2023-04-06 17:20:59,887 - Trying to read DistalPhalanxOutlineAgeGroup data locally
2023-04-06 17:20:59,906 - --------BASE RESULT FOR MODEL - recurrence--------
2023-04-06 17:20:59,907 - ----TYPE OF ML TASK - multiclass. Metric - f1-----
2023-04-06 17:20:59,908 - {'f1': 0.686, 'roc_auc': 0.832}
2023-04-06 17:20:59,909 - ---------BASE RESULT FOR MODEL - quantile---------
2023-04-06 17:20:59,910 - ----TYPE OF ML TASK - multiclass. Metric - f1-----
2023-04-06 17:20:59,911 - {'f1': 0.735, 'roc_auc': 0.891}
2023-04-06 17:20:59,912 - -------BASE RESULT FOR MODEL - topological--------
2023-04-06 17:20:59,914 - ----TYPE OF ML TASK - multiclass. Metric - f1-----
2023-04-06 17:20:59,915 - {'f1': 0.688, 'roc_auc': 0.805}
2023-04-06 17:20:59,916 - --CURRENT BEST METRIC - 0.735. MODEL - quantile---
2023-04-06 17:20:59,917 - Applying ensemble dict_keys(['MeanEnsemble', 'MedianEnsemble', 'MinEnsemble', 'MaxEnsemble', 'ProductEnsemble']) strategy for 2 models
2023-04-06 17:20:59,919 - Calculating metri

{'Base_model': 'quantile',
 'Base_metric': 0.735,
 'Ensemble_models': ['quantile', 'topological'],
 'Ensemble_method': 'MeanEnsemble',
 'Best_ensemble_metric': 0.748}