From 7cc51c98ed9afb5f6b8cd7f2b30412b0f65b17a9 Mon Sep 17 00:00:00 2001 From: Pakulin Sergei <38308131+IIaKyJIuH@users.noreply.github.com> Date: Mon, 22 May 2023 15:56:56 +0300 Subject: [PATCH] 358 Reduce execution time for unit tests (#1098) --- .codecov.yml | 4 +- .github/workflows/integration-build.yml | 32 ++++++++ .github/workflows/manual-build.yml | 2 +- .github/workflows/unit-build.yml | 3 +- .../multitask_classification_regression.py | 24 +++--- ...multitask_classification_regression_api.py | 7 +- fedot/api/help.py | 29 +++++--- fedot/core/pipelines/tuning/search_space.py | 15 ++-- .../models => integration/api}/__init__.py | 0 test/integration/api/test_api_cli_params.py | 39 ++++++++++ .../api/test_api_info.py | 0 .../api/test_api_utils.py | 2 +- .../api/test_main_api.py | 39 +++++----- .../api_params/test_main_api_params.py | 4 +- test/integration/automl/test_automl.py | 9 +-- .../classification}/__init__.py | 0 .../classification/test_classification.py | 74 +++++++++++++++++++ test/integration/composer/__init__.py | 0 .../composer/test_composer.py | 47 ++++++------ .../composer/test_history.py | 4 +- test/integration/data/__init__.py | 0 test/integration/data/test_data.py | 12 +++ test/integration/data_operations/__init__.py | 0 .../test_text_preprocessing.py | 0 test/integration/models/__init__.py | 0 .../models/test_atomized_model.py | 2 +- .../models/test_custom_model_introduction.py | 10 +-- .../models/test_model.py | 2 +- .../models/test_models_params.py | 0 .../models/test_repository.py | 2 +- .../models/test_split_train_test.py | 3 +- .../models/test_strategy.py | 0 test/integration/multimodal/__init__.py | 0 .../multimodal/test_multimodal.py} | 38 +--------- test/integration/optimizer/__init__.py | 0 .../optimizer/test_evaluation.py | 1 - test/integration/pipelines/__init__.py | 0 test/integration/pipelines/tuning/__init__.py | 0 .../pipelines/tuning/test_pipeline_tuning.py | 0 .../pipelines/tuning/test_tuner_builder.py | 24 +++--- .../quality/test_synthetic_tasks.py | 4 +- .../real_applications/test_examples.py | 14 ++-- .../real_applications/test_real_cases.py | 20 +---- test/integration/remote/__init__.py | 0 .../remote/test_remote_composer.py | 0 .../utilities/test_pipeline_import_export.py | 2 +- .../utilities/test_project_import_export.py | 2 +- test/sensitivity/test_sensitivity.py | 2 +- test/test_gpu_strategy.py | 8 +- test/unit/api/test_api_cli_params.py | 35 --------- test/unit/api/test_api_safety.py | 4 +- test/unit/api/test_assumption_builder.py | 11 +-- test/unit/api/test_presets.py | 2 +- test/unit/data/test_data.py | 9 --- test/unit/data/test_data_merge_text.py | 9 +-- .../test_data_operation_params.py | 4 +- test/unit/multimodal/test_multimodal.py | 34 +++++++++ .../optimizer/gp_operators/test_mutation.py | 16 ++-- test/unit/optimizer/test_external.py | 2 +- .../optimizer/test_pipeline_objective_eval.py | 3 +- test/unit/pipelines/test_pipeline.py | 23 +++--- test/unit/preprocessing/test_preprocessors.py | 12 ++- test/unit/tasks/test_classification.py | 72 +----------------- test/unit/tasks/test_clustering.py | 2 +- test/unit/tasks/test_forecasting.py | 11 +-- test/unit/tasks/test_multi_ts_forecast.py | 4 +- test/unit/validation/test_table_cv.py | 26 +++---- 67 files changed, 385 insertions(+), 374 deletions(-) create mode 100644 .github/workflows/integration-build.yml rename test/{unit/models => integration/api}/__init__.py (100%) create mode 100644 test/integration/api/test_api_cli_params.py rename test/{unit => integration}/api/test_api_info.py (100%) rename test/{unit => integration}/api/test_api_utils.py (98%) rename test/{unit => integration}/api/test_main_api.py (94%) rename test/{unit/pipelines/tuning => integration/classification}/__init__.py (100%) create mode 100644 test/integration/classification/test_classification.py create mode 100644 test/integration/composer/__init__.py rename test/{unit => integration}/composer/test_composer.py (94%) rename test/{unit => integration}/composer/test_history.py (97%) create mode 100644 test/integration/data/__init__.py create mode 100644 test/integration/data/test_data.py create mode 100644 test/integration/data_operations/__init__.py rename test/{unit => integration}/data_operations/test_text_preprocessing.py (100%) create mode 100644 test/integration/models/__init__.py rename test/{unit => integration}/models/test_atomized_model.py (98%) rename test/{unit => integration}/models/test_custom_model_introduction.py (96%) rename test/{unit => integration}/models/test_model.py (100%) rename test/{unit => integration}/models/test_models_params.py (100%) rename test/{unit => integration}/models/test_repository.py (98%) rename test/{unit => integration}/models/test_split_train_test.py (98%) rename test/{unit => integration}/models/test_strategy.py (100%) create mode 100644 test/integration/multimodal/__init__.py rename test/{unit/multimodal/test_multi_modal.py => integration/multimodal/test_multimodal.py} (56%) create mode 100644 test/integration/optimizer/__init__.py rename test/{unit => integration}/optimizer/test_evaluation.py (99%) create mode 100644 test/integration/pipelines/__init__.py create mode 100644 test/integration/pipelines/tuning/__init__.py rename test/{unit => integration}/pipelines/tuning/test_pipeline_tuning.py (100%) rename test/{unit => integration}/pipelines/tuning/test_tuner_builder.py (86%) create mode 100644 test/integration/remote/__init__.py rename test/{unit => integration}/remote/test_remote_composer.py (100%) rename test/{unit => integration}/utilities/test_pipeline_import_export.py (99%) rename test/{unit => integration}/utilities/test_project_import_export.py (97%) delete mode 100644 test/unit/api/test_api_cli_params.py diff --git a/.codecov.yml b/.codecov.yml index c90919c3a2..67bb09b2da 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -9,4 +9,6 @@ ignore: - "fedot/sensitivity" # unit test are moved to integration test due to the time restrictions - "fedot/utilities" # unit test are moved to integration test due to the time restrictions - "fedot/visualisation" # complicated to test in unit-like way, tested in integration tests - - "fedot/core/operations/evaluation/automl.py" # require to heavy external dependencies - tested in integration test \ No newline at end of file + - "fedot/core/operations/evaluation/automl.py" # require to heavy external dependencies - tested in integration test + - "fedot/api/help.py" # unit test are moved to integration test due to the time restrictions + - "fedot/core/pipelines/tuning/search_space.py" # unit test are moved to integration test due to the time restrictions \ No newline at end of file diff --git a/.github/workflows/integration-build.yml b/.github/workflows/integration-build.yml new file mode 100644 index 0000000000..9995394ec1 --- /dev/null +++ b/.github/workflows/integration-build.yml @@ -0,0 +1,32 @@ +name: Integration build + +on: + schedule: + - cron: '0 12 * * *' + +jobs: + scheduled: + runs-on: ubuntu-latest + timeout-minutes: 95 + strategy: + matrix: + python-version: [ 3.8, 3.9, '3.10' ] + + steps: + - name: Checkout branch + uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + pip install .[extra] + pip install .[examples] + pip install .[profilers] + pip install pytest-cov + - name: Test with pytest + run: | + pytest --cov=fedot -s test/integration diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 2a6e8541be..43aeb3f537 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -6,7 +6,7 @@ on: jobs: integration_test: runs-on: ubuntu-latest - timeout-minutes: 20 + timeout-minutes: 95 strategy: matrix: python-version: [ 3.8, 3.9, '3.10' ] diff --git a/.github/workflows/unit-build.yml b/.github/workflows/unit-build.yml index a40a185b1b..c53b900435 100644 --- a/.github/workflows/unit-build.yml +++ b/.github/workflows/unit-build.yml @@ -28,8 +28,7 @@ jobs: run: | python -m pip install --upgrade pip pip install pytest - pip install .[extra] - pip install .[examples] + pip install . pip install pytest-cov - name: Test with pytest run: | diff --git a/examples/advanced/multitask_classification_regression.py b/examples/advanced/multitask_classification_regression.py index 4788d045e6..9d3dda5719 100644 --- a/examples/advanced/multitask_classification_regression.py +++ b/examples/advanced/multitask_classification_regression.py @@ -1,5 +1,5 @@ -import os from datetime import timedelta +from typing import Tuple import numpy as np import pandas as pd @@ -14,7 +14,7 @@ from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum from fedot.core.repository.tasks import TaskTypesEnum, Task -from test.unit.api.test_api_cli_params import project_root_path +from fedot.core.utils import fedot_project_root def get_multitask_pipeline(): @@ -24,11 +24,11 @@ def get_multitask_pipeline(): return Pipeline(final_node) -def prepare_multitask_data() -> (MultiModalData, MultiModalData): +def prepare_multitask_data() -> Tuple[MultiModalData, MultiModalData]: """ Load data for multitask regression / classification pipeline """ - ex_data = os.path.join(project_root_path, 'examples/data') - train_df = pd.read_csv(os.path.join(ex_data, 'train_synthetic_regression_classification.csv')) - test_df = pd.read_csv(os.path.join(ex_data, 'test_synthetic_regression_classification.csv')) + ex_data = fedot_project_root().joinpath('examples/data') + train_df = pd.read_csv(ex_data.joinpath('train_synthetic_regression_classification.csv')) + test_df = pd.read_csv(ex_data.joinpath('test_synthetic_regression_classification.csv')) # Data for classification class_task = Task(TaskTypesEnum.classification) @@ -66,12 +66,14 @@ def launch_multitask_example(with_tuning: bool = False): multitask_pipeline = get_multitask_pipeline() if with_tuning: - tuner = TunerBuilder(train_input.task)\ - .with_tuner(SimultaneousTuner)\ - .with_metric(RegressionMetricsEnum.MAE)\ - .with_iterations(100)\ - .with_timeout(timedelta(minutes=2))\ + tuner = ( + TunerBuilder(train_input.task) + .with_tuner(SimultaneousTuner) + .with_metric(RegressionMetricsEnum.MAE) + .with_iterations(100) + .with_timeout(timedelta(minutes=2)) .build(train_input) + ) multitask_pipeline = tuner.tune(multitask_pipeline) multitask_pipeline.fit(train_input) diff --git a/examples/simple/multitask_classification_regression_api.py b/examples/simple/multitask_classification_regression_api.py index 613df4abc2..9ed2587cbf 100644 --- a/examples/simple/multitask_classification_regression_api.py +++ b/examples/simple/multitask_classification_regression_api.py @@ -1,15 +1,16 @@ import os +from typing import Tuple import numpy as np import pandas as pd from fedot.api.main import Fedot -from test.unit.api.test_api_cli_params import project_root_path +from fedot.core.utils import fedot_project_root -def load_train_test_dataframes() -> (pd.DataFrame, pd.DataFrame): +def load_train_test_dataframes() -> Tuple[dict, dict, dict]: """ Load data for multitask regression / classification problem """ - data_path = os.path.join(project_root_path, 'examples/data') + data_path = fedot_project_root().joinpath('examples/data') train_df = pd.read_csv(os.path.join(data_path, 'train_synthetic_regression_classification.csv')) test_df = pd.read_csv(os.path.join(data_path, 'test_synthetic_regression_classification.csv')) diff --git a/fedot/api/help.py b/fedot/api/help.py index 959b940d9d..052f314496 100644 --- a/fedot/api/help.py +++ b/fedot/api/help.py @@ -15,14 +15,18 @@ def print_models_info(task_name): # Filter operations repository_operations_list = _filter_operations_by_type(repository, task) + search_space = PipelineSearchSpace() for model in repository_operations_list: if model.id != 'custom': - hyperparameters = PipelineSearchSpace().get_operation_parameter_range(str(model.id)) + hyperparameters = search_space.get_operation_parameter_range(str(model.id)) implementation_info = model.current_strategy(task)(model.id).implementation_info - print(f"Model name - '{model.id}'") - print(f"Available hyperparameters to optimize with tuner - {hyperparameters}") - print(f"Strategy implementation - {model.current_strategy(task)}") - print(f"Model implementation - {implementation_info}\n") + info_lst = [ + f"Model name - '{model.id}'", + f"Available hyperparameters to optimize with tuner - {hyperparameters}", + f"Strategy implementation - {model.current_strategy(task)}", + f"Model implementation - {implementation_info}\n" + ] + print('\n'.join(info_lst)) def print_data_operations_info(task_name): @@ -34,15 +38,18 @@ def print_data_operations_info(task_name): task = _get_task_by_name(task_name) repository = OperationTypesRepository(operation_type='data_operation') - # Filter operations repository_operations_list = _filter_operations_by_type(repository, task) + search_space = PipelineSearchSpace() for operation in repository_operations_list: - hyperparameters = PipelineSearchSpace().get_operation_parameter_range(str(operation.id)) + hyperparameters = search_space.get_operation_parameter_range(str(operation.id)) implementation_info = operation.current_strategy(task)(operation.id).implementation_info - print(f"Data operation name - '{operation.id}'") - print(f"Available hyperparameters to optimize with tuner - {hyperparameters}") - print(f"Strategy implementation - {operation.current_strategy(task)}") - print(f"Operation implementation - {implementation_info}\n") + info_lst = [ + f"Data operation name - '{operation.id}'", + f"Available hyperparameters to optimize with tuner - {hyperparameters}", + f"Strategy implementation - {operation.current_strategy(task)}", + f"Operation implementation - {implementation_info}\n" + ] + print('\n'.join(info_lst)) def _filter_operations_by_type(repository, task): diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index b3f700690d..611323081a 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -1,3 +1,5 @@ +from typing import Optional, Dict, Tuple, Callable, List + import numpy as np from golem.core.tuning.search_space import SearchSpace from hyperopt import hp @@ -13,7 +15,7 @@ class PipelineSearchSpace(SearchSpace): """ def __init__(self, - custom_search_space: dict = None, + custom_search_space: Optional[Dict[str, Dict[str, Tuple[Callable, List]]]] = None, replace_default_search_space: bool = False): self.custom_search_space = custom_search_space self.replace_default_search_space = replace_default_search_space @@ -291,11 +293,10 @@ def get_parameters_dict(self): } if self.custom_search_space is not None: - for operation in self.custom_search_space.keys(): - if self.replace_default_search_space: - parameters_per_operation[operation] = self.custom_search_space[operation] - else: - for key, value in self.custom_search_space[operation].items(): - parameters_per_operation[operation][key] = value + if self.replace_default_search_space: + parameters_per_operation.update(self.custom_search_space) + else: + for operation_name, operation_dct in self.custom_search_space.items(): + parameters_per_operation[operation_name].update(operation_dct) return parameters_per_operation diff --git a/test/unit/models/__init__.py b/test/integration/api/__init__.py similarity index 100% rename from test/unit/models/__init__.py rename to test/integration/api/__init__.py diff --git a/test/integration/api/test_api_cli_params.py b/test/integration/api/test_api_cli_params.py new file mode 100644 index 0000000000..88c3afd78c --- /dev/null +++ b/test/integration/api/test_api_cli_params.py @@ -0,0 +1,39 @@ +from typing import List + +from fedot.api.fedot_cli import create_parser, separate_argparse_to_fedot, preprocess_keys, run_fedot, \ + arguments_dicts +from fedot.core.utils import fedot_project_root + + +def call_cli_with_parameters(call_string: List[str]): + """ Function that imitates argparse api call""" + parser = create_parser(arguments_dicts) + parameters = parser.parse_args(call_string) + main_params, fit_params = separate_argparse_to_fedot(parameters) + preprocess_keys(main_params) + preprocess_keys(fit_params) + predictions = run_fedot(parameters, main_params, fit_params, save_predictions=False) + return predictions + + +def test_cli_with_parameters(): + """ Test all parameters used in cli are available from api""" + project_root_path = fedot_project_root() + ts_train_path = project_root_path.joinpath('test/data/simple_time_series.csv') + ts_call = ( + f'--problem ts_forecasting --preset fast_train --timeout 0.1 --depth 3 --arity 3 ' + '--popsize 3 --gen_num 5 --opers lagged linear ridge --tuning 0 ' + f'--cv_folds 2 --val_bl 2 --target sea_height --train {ts_train_path} ' + f'--test {ts_train_path} --for_len 10' + ).split() + class_train_path = project_root_path.joinpath('test/data/simple_classification.csv') + class_call = ( + f'--problem classification --train {class_train_path} --test {class_train_path} --target Y ' + '--preset fast_train --timeout 0.1 --depth 3 --arity 3 ' + '--popsize 3 --gen_num 5 --tuning 1' + ).split() + + ts_predictions = call_cli_with_parameters(ts_call) + assert ts_predictions is not None + class_predictions = call_cli_with_parameters(class_call) + assert class_predictions is not None diff --git a/test/unit/api/test_api_info.py b/test/integration/api/test_api_info.py similarity index 100% rename from test/unit/api/test_api_info.py rename to test/integration/api/test_api_info.py diff --git a/test/unit/api/test_api_utils.py b/test/integration/api/test_api_utils.py similarity index 98% rename from test/unit/api/test_api_utils.py rename to test/integration/api/test_api_utils.py index 488d4c506b..f64277681b 100644 --- a/test/unit/api/test_api_utils.py +++ b/test/integration/api/test_api_utils.py @@ -13,7 +13,7 @@ from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams from fedot.preprocessing.preprocessing import DataPreprocessor -from test.unit.api.test_main_api import get_dataset, get_cholesterol_dataset +from test.integration.api.test_main_api import get_dataset, get_cholesterol_dataset from test.unit.tasks.test_classification import get_binary_classification_data diff --git a/test/unit/api/test_main_api.py b/test/integration/api/test_main_api.py similarity index 94% rename from test/unit/api/test_main_api.py rename to test/integration/api/test_main_api.py index ac8a012d93..3dc9abb746 100644 --- a/test/unit/api/test_main_api.py +++ b/test/integration/api/test_main_api.py @@ -7,7 +7,6 @@ import pandas as pd import pytest from golem.core.dag.graph_utils import graph_structure -from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder @@ -22,19 +21,17 @@ from fedot.core.data.supplementary_data import SupplementaryData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline -from fedot.core.pipelines.pipeline_builder import PipelineBuilder from fedot.core.repository.dataset_types import DataTypesEnum -from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams from fedot.core.utils import fedot_project_root +from test.integration.models.test_split_train_test import get_synthetic_input_data from test.unit.common_tests import is_predict_ignores_target -from test.unit.models.test_split_train_test import get_synthetic_input_data from test.unit.tasks.test_classification import get_synthetic_classification_data, get_iris_data from test.unit.tasks.test_forecasting import get_ts_data from test.unit.tasks.test_multi_ts_forecast import get_multi_ts_data from test.unit.tasks.test_regression import get_synthetic_regression_data -default_params = { +TESTS_MAIN_API_DEFAULT_PARAMS = { 'timeout': 0.1, 'preset': 'fast_train', 'max_depth': 1, @@ -157,7 +154,7 @@ def data_with_binary_features_and_categorical_target(): ]) def test_api_predict_correct(task_type, predefined_model, metric_name): train_data, test_data, _ = get_dataset(task_type) - model = Fedot(problem=task_type, **default_params) + model = Fedot(problem=task_type, **TESTS_MAIN_API_DEFAULT_PARAMS) fedot_model = model.fit(features=train_data, predefined_model=predefined_model) prediction = model.predict(features=test_data) metric = model.get_metrics(metric_names=metric_name) @@ -204,7 +201,7 @@ def test_api_simple_ts_predict_correct(task_type: str = 'ts_forecasting'): # The forecast length must be equal to 5 forecast_length = 5 train_data, test_data, _ = get_dataset(task_type, validation_blocks=1) - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length), validation_blocks=1) @@ -220,7 +217,7 @@ def test_api_in_sample_ts_predict_correct(validation_blocks, task_type: str = 't # The forecast length must be equal to 5 forecast_length = 5 train_data, test_data, _ = get_dataset(task_type, validation_blocks=validation_blocks) - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length), validation_blocks=validation_blocks) @@ -235,7 +232,7 @@ def test_api_in_sample_ts_predict_correct(validation_blocks, task_type: str = 't def test_api_in_sample_multi_ts_predict_correct(validation_blocks, task_type: str = 'ts_forecasting'): forecast_length = 2 train_data, test_data = get_multi_ts_data(forecast_length=forecast_length, validation_blocks=validation_blocks) - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length), validation_blocks=validation_blocks, available_operations=['lagged', 'smoothing', 'diff_filter', 'gaussian_filter', @@ -253,7 +250,7 @@ def test_api_in_sample_multimodal_ts_predict_correct(validation_blocks): forecast_length = 5 historical_data, target = get_multimodal_ts_data() - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length)) model.fit(features=historical_data, target=target, predefined_model='auto') ts_forecast = model.predict(historical_data, validation_blocks=validation_blocks) @@ -367,7 +364,7 @@ def test_multiobj_for_api(): train_data, test_data, _ = get_dataset('classification') params = { - **default_params, + **TESTS_MAIN_API_DEFAULT_PARAMS, 'metric': ['f1', 'node_num'] } @@ -384,7 +381,7 @@ def test_multiobj_for_api(): def test_categorical_preprocessing_unidata(): train_data, test_data = load_categorical_unimodal() - auto_model = Fedot(problem='classification', **default_params) + auto_model = Fedot(problem='classification', **TESTS_MAIN_API_DEFAULT_PARAMS) auto_model.fit(features=train_data) prediction = auto_model.predict(features=test_data) prediction_proba = auto_model.predict_proba(features=test_data) @@ -398,7 +395,7 @@ def test_categorical_preprocessing_unidata(): def test_categorical_preprocessing_unidata_predefined(): train_data, test_data = load_categorical_unimodal() - auto_model = Fedot(problem='classification', **default_params) + auto_model = Fedot(problem='classification', **TESTS_MAIN_API_DEFAULT_PARAMS) auto_model.fit(features=train_data, predefined_model='rf') prediction = auto_model.predict(features=test_data) prediction_proba = auto_model.predict_proba(features=test_data) @@ -437,7 +434,7 @@ def test_fill_nan_without_categorical(): def test_dict_multimodal_input_for_api(): data, target = load_categorical_multidata() - model = Fedot(problem='classification', **default_params) + model = Fedot(problem='classification', **TESTS_MAIN_API_DEFAULT_PARAMS) model.fit(features=data, target=target) @@ -459,7 +456,7 @@ def test_unshuffled_data(): problem = 'classification' params = { - **default_params, + **TESTS_MAIN_API_DEFAULT_PARAMS, 'metric': 'f1'} auto_model = Fedot(problem=problem, seed=42, **params) @@ -473,7 +470,7 @@ def test_custom_history_dir_define_correct(): custom_path = os.path.join(os.path.abspath(os.getcwd()), 'history_dir') params = { - **default_params, + **TESTS_MAIN_API_DEFAULT_PARAMS, 'history_dir': custom_path, 'timeout': None, 'num_of_generations': 1, @@ -536,7 +533,7 @@ def test_unknown_param_raises_error(): def test_default_forecast(): forecast_length = 2 train_data, test_data, _ = get_dataset('ts_forecasting') - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length)) model.fit(train_data, predefined_model='auto') forecast = model.forecast() @@ -548,7 +545,7 @@ def test_default_forecast(): def test_forecast_with_different_horizons(horizon): forecast_length = 2 train_data, test_data, _ = get_dataset('ts_forecasting') - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length)) model.fit(train_data, predefined_model='auto') forecast = model.forecast(pre_history=test_data, horizon=horizon) @@ -558,14 +555,14 @@ def test_forecast_with_different_horizons(horizon): def test_forecast_with_unfitted_model(): forecast_length = 2 - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length)) with pytest.raises(ValueError): model.forecast() def test_forecast_with_not_ts_problem(): - model = Fedot(problem='classification', **default_params) + model = Fedot(problem='classification', **TESTS_MAIN_API_DEFAULT_PARAMS) train_data, test_data, _ = get_dataset('classification') model.fit(train_data, predefined_model='auto') with pytest.raises(ValueError): @@ -577,7 +574,7 @@ def test_forecast_with_multivariate_ts(): historical_data, target = get_multimodal_ts_data() - model = Fedot(problem='ts_forecasting', **default_params, + model = Fedot(problem='ts_forecasting', **TESTS_MAIN_API_DEFAULT_PARAMS, task_params=TsForecastingParams(forecast_length=forecast_length)) model.fit(features=historical_data, target=target, predefined_model='auto') forecast = model.forecast() diff --git a/test/integration/api_params/test_main_api_params.py b/test/integration/api_params/test_main_api_params.py index 1da7cca703..a7e0eac13c 100644 --- a/test/integration/api_params/test_main_api_params.py +++ b/test/integration/api_params/test_main_api_params.py @@ -1,6 +1,6 @@ import logging from dataclasses import dataclass -from typing import Callable, Union, Tuple +from typing import Callable, Union import pytest from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum @@ -8,7 +8,7 @@ from fedot.api.main import Fedot from fedot.core.repository.tasks import TsForecastingParams -from test.unit.api.test_main_api import get_dataset +from test.integration.api.test_main_api import get_dataset @dataclass diff --git a/test/integration/automl/test_automl.py b/test/integration/automl/test_automl.py index 9d8ef505c9..517faaf67c 100644 --- a/test/integration/automl/test_automl.py +++ b/test/integration/automl/test_automl.py @@ -1,4 +1,3 @@ -import os from datetime import timedelta from examples.advanced.automl.h2o_example import h2o_classification_pipeline_evaluation, \ @@ -10,9 +9,7 @@ def test_pipeline_from_automl_example(): - project_root_path = str(fedot_project_root()) - - file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') + file_path_train = fedot_project_root().joinpath('test/data/simple_classification.csv') file_path_test = file_path_train auc = run_pipeline_from_automl(file_path_train, file_path_test, max_run_time=timedelta(seconds=1)) @@ -21,8 +18,7 @@ def test_pipeline_from_automl_example(): def test_tpot_vs_fedot_example(): - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') + file_path_train = fedot_project_root().joinpath('test/data/simple_classification.csv') file_path_test = file_path_train auc = run_tpot_vs_fedot_example(file_path_train, file_path_test) @@ -36,4 +32,3 @@ def test_h2o_vs_fedot_example(): h2o_regression_pipeline_evaluation() with OperationTypesRepository.init_automl_repository() as _: h2o_ts_pipeline_evaluation() - diff --git a/test/unit/pipelines/tuning/__init__.py b/test/integration/classification/__init__.py similarity index 100% rename from test/unit/pipelines/tuning/__init__.py rename to test/integration/classification/__init__.py diff --git a/test/integration/classification/test_classification.py b/test/integration/classification/test_classification.py new file mode 100644 index 0000000000..99db05825f --- /dev/null +++ b/test/integration/classification/test_classification.py @@ -0,0 +1,74 @@ +import numpy as np + +try: + import tensorflow as tf +except ModuleNotFoundError: + from golem.utilities.requirements_notificator import warn_requirement + + warn_requirement('tensorflow') + +from test.unit.common_tests import is_predict_ignores_target +from test.unit.tasks.test_classification import get_image_classification_data + +from fedot.core.operations.evaluation.operation_implementations.models.keras import ( + FedotCNNImplementation, + check_input_array, + create_deep_cnn, + fit_cnn, + predict_cnn +) + + +def check_predict_cnn_correct(model, dataset_to_validate): + return is_predict_ignores_target( + predict_func=predict_cnn, + predict_args={'trained_model': model}, + data_arg_name='predict_data', + input_data=dataset_to_validate, + ) + + +def test_cnn_custom_class(): + cnn_class = FedotCNNImplementation() + + assert cnn_class.params is not None + assert type(cnn_class) == FedotCNNImplementation + + +def test_image_classification_quality(): + roc_auc_on_valid, _, _ = get_image_classification_data() + deviation_composite = roc_auc_on_valid - 0.5 + + roc_auc_on_valid, _, _ = get_image_classification_data(composite_flag=False) + deviation_simple = roc_auc_on_valid - 0.5 + + assert abs(deviation_composite) < 0.25 + assert abs(deviation_simple) < 0.35 + + +def test_cnn_methods(): + _, dataset_to_train, dataset_to_validate = get_image_classification_data() + image_shape = (28, 28, 1) + num_classes = 7 + epochs = 10 + batch_size = 128 + + cnn_model = create_deep_cnn(input_shape=image_shape, + num_classes=num_classes) + + transformed_x_train, transform_flag = check_input_array(x_train=dataset_to_train.features) + + model = fit_cnn(train_data=dataset_to_train, + model=cnn_model, + epochs=epochs, + batch_size=batch_size) + + prediction = predict_cnn(trained_model=model, + predict_data=dataset_to_validate) + + assert type(cnn_model) == tf.keras.Sequential + assert transform_flag is True + assert cnn_model.input_shape[1:] == image_shape + assert cnn_model.output_shape[1] == num_classes + assert type(prediction) == np.ndarray + assert check_predict_cnn_correct(model, dataset_to_validate) diff --git a/test/integration/composer/__init__.py b/test/integration/composer/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/composer/test_composer.py b/test/integration/composer/test_composer.py similarity index 94% rename from test/unit/composer/test_composer.py rename to test/integration/composer/test_composer.py index 442674767c..f8b1d8cf98 100644 --- a/test/unit/composer/test_composer.py +++ b/test/integration/composer/test_composer.py @@ -1,6 +1,6 @@ import datetime -import os import random +from pathlib import Path import numpy as np import pandas as pd @@ -9,8 +9,6 @@ from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum from golem.core.optimisers.random.random_search import RandomSearchOptimizer - -from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from sklearn.metrics import roc_auc_score as roc_auc from fedot.api.main import Fedot @@ -23,16 +21,18 @@ from fedot.core.optimisers.objective.metrics_objective import MetricsObjective from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from fedot.core.pipelines.pipeline_graph_generation_params import get_pipeline_generation_params from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operations_for_task from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import fedot_project_root from test.unit.pipelines.test_pipeline_comparison import pipeline_first, pipeline_second -def to_numerical(categorical_ids: np.ndarray): - encoded = pd.factorize(categorical_ids)[0] +def to_categorical_codes(categorical_ids: np.ndarray): + encoded = pd.Categorical(categorical_ids).codes return encoded @@ -44,10 +44,10 @@ def seed(): @pytest.fixture() def file_data_setup(): - test_file_path = str(os.path.dirname(__file__)) - file = '../../data/advanced_classification.csv' - input_data = InputData.from_csv(os.path.join(test_file_path, file)) - input_data.idx = to_numerical(categorical_ids=input_data.idx) + file = 'test/data/advanced_classification.csv' + test_file_path = Path(fedot_project_root(), file) + input_data = InputData.from_csv(test_file_path) + input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) return input_data @@ -124,7 +124,7 @@ def test_gp_composer_build_pipeline_correct(data_fixture, request): def baseline_pipeline(): pipeline = Pipeline() last_node = PipelineNode(operation_type='rf', - nodes_from=[]) + nodes_from=[]) for requirement_model in ['knn', 'logit']: new_node = PipelineNode(requirement_model) pipeline.add_node(new_node) @@ -225,11 +225,13 @@ def test_multi_objective_composer(data_fixture, request): params = GPAlgorithmParameters(pop_size=2, genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, selection_types=[SelectionTypesEnum.spea2]) - composer = ComposerBuilder(task=Task(task_type))\ - .with_requirements(req)\ - .with_metrics((ClassificationMetricsEnum.ROCAUC, ComplexityMetricsEnum.node_num))\ - .with_optimizer_params(params)\ + composer = ( + ComposerBuilder(task=Task(task_type)) + .with_requirements(req) + .with_metrics((ClassificationMetricsEnum.ROCAUC, ComplexityMetricsEnum.node_num)) + .with_optimizer_params(params) .build() + ) pipelines_evo_composed = composer.compose_pipeline(data=dataset_to_compose) pipelines_roc_auc = [] @@ -249,18 +251,15 @@ def test_multi_objective_composer(data_fixture, request): assert all([roc_auc > 0.6 for roc_auc in pipelines_roc_auc]) -def dummy_quality_metric(*args, **kwargs): - return 1.0 # stagnating - - @pytest.mark.parametrize('data_fixture', ['file_data_setup']) def test_gp_composer_with_adaptive_depth(data_fixture, request): + # TODO: i358 Should be integrational data = request.getfixturevalue(data_fixture) dataset_to_compose = data available_secondary_model_types = ['rf', 'knn', 'logit', 'dt'] available_primary_model_types = available_secondary_model_types + ['scaling', 'resample'] - quality_metric = dummy_quality_metric + quality_metric = lambda *args, **kwargs: 1.0 # noqa max_depth = 5 num_gen = 3 req = PipelineComposerRequirements(primary=available_primary_model_types, secondary=available_secondary_model_types, @@ -269,11 +268,13 @@ def test_gp_composer_with_adaptive_depth(data_fixture, request): adaptive_depth_max_stagnation=num_gen - 1, genetic_scheme_type=GeneticSchemeTypesEnum.steady_state, pop_size=10) - composer = ComposerBuilder(task=Task(TaskTypesEnum.classification)) \ - .with_requirements(req) \ - .with_optimizer_params(params) \ - .with_metrics(quality_metric) \ + composer = ( + ComposerBuilder(task=Task(TaskTypesEnum.classification)) + .with_requirements(req) + .with_optimizer_params(params) + .with_metrics(quality_metric) .build() + ) composer.compose_pipeline(data=dataset_to_compose) diff --git a/test/unit/composer/test_history.py b/test/integration/composer/test_history.py similarity index 97% rename from test/unit/composer/test_history.py rename to test/integration/composer/test_history.py index 38c367a667..4e80ee5e43 100644 --- a/test/unit/composer/test_history.py +++ b/test/integration/composer/test_history.py @@ -1,4 +1,3 @@ -import os from itertools import chain from pathlib import Path @@ -68,8 +67,7 @@ def _test_individuals_in_history(history: OptHistory): @pytest.mark.parametrize('n_jobs', [1, 2]) def test_newly_generated_history(n_jobs: int): - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') + file_path_train = fedot_project_root().joinpath('test/data/simple_classification.csv') num_of_gens = 2 auto_model = Fedot(problem='classification', seed=42, diff --git a/test/integration/data/__init__.py b/test/integration/data/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/integration/data/test_data.py b/test/integration/data/test_data.py new file mode 100644 index 0000000000..ae455f67a7 --- /dev/null +++ b/test/integration/data/test_data.py @@ -0,0 +1,12 @@ +import numpy as np + +from fedot.core.repository.dataset_types import DataTypesEnum +from test.unit.tasks.test_classification import get_image_classification_data + + +def test_data_from_image(): + _, _, dataset_to_validate = get_image_classification_data() + + assert dataset_to_validate.data_type == DataTypesEnum.image + assert type(dataset_to_validate.features) == np.ndarray + assert type(dataset_to_validate.target) == np.ndarray diff --git a/test/integration/data_operations/__init__.py b/test/integration/data_operations/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/data_operations/test_text_preprocessing.py b/test/integration/data_operations/test_text_preprocessing.py similarity index 100% rename from test/unit/data_operations/test_text_preprocessing.py rename to test/integration/data_operations/test_text_preprocessing.py diff --git a/test/integration/models/__init__.py b/test/integration/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/models/test_atomized_model.py b/test/integration/models/test_atomized_model.py similarity index 98% rename from test/unit/models/test_atomized_model.py rename to test/integration/models/test_atomized_model.py index 102b65efb6..d881ddd0e7 100644 --- a/test/unit/models/test_atomized_model.py +++ b/test/integration/models/test_atomized_model.py @@ -11,7 +11,7 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.utils import fedot_project_root -from test.unit.utilities.test_pipeline_import_export import create_correct_path, create_func_delete_files +from test.integration.utilities.test_pipeline_import_export import create_correct_path, create_func_delete_files @pytest.fixture(scope='session', autouse=True) diff --git a/test/unit/models/test_custom_model_introduction.py b/test/integration/models/test_custom_model_introduction.py similarity index 96% rename from test/unit/models/test_custom_model_introduction.py rename to test/integration/models/test_custom_model_introduction.py index 103638ec0e..1c5504360d 100644 --- a/test/unit/models/test_custom_model_introduction.py +++ b/test/integration/models/test_custom_model_introduction.py @@ -44,8 +44,8 @@ def get_centered_pipeline(with_params=True) -> Pipeline: custom_node = PipelineNode('custom', nodes_from=[lagged_node]) if with_params: custom_node.parameters = {"a": -50, - "b": 500, - 'model_predict': custom_model_imitation} + "b": 500, + 'model_predict': custom_model_imitation} node_final = PipelineNode('ridge', nodes_from=[custom_node]) pipeline = Pipeline(node_final) @@ -60,8 +60,8 @@ def get_starting_pipeline(with_params=True): custom_node = PipelineNode('custom') if with_params: custom_node.parameters = {"a": -50, - "b": 500, - 'model_predict': custom_model_imitation} + "b": 500, + 'model_predict': custom_model_imitation} lagged_node = PipelineNode('lagged', nodes_from=[custom_node]) node_final = PipelineNode('ridge', nodes_from=[lagged_node]) pipeline = Pipeline(node_final) @@ -154,7 +154,7 @@ def get_simple_pipeline(multi_data): # For custom model params as initial approximation and model as function is necessary custom_node = PipelineNode('custom/empty', nodes_from=exog_list) custom_node.parameters = {'model_predict': model_predict, - 'model_fit': model_fit} + 'model_fit': model_fit} exog_pred_node = PipelineNode('exog_ts', nodes_from=[custom_node]) diff --git a/test/unit/models/test_model.py b/test/integration/models/test_model.py similarity index 100% rename from test/unit/models/test_model.py rename to test/integration/models/test_model.py index fa3dbb99c0..332d8ef7f9 100644 --- a/test/unit/models/test_model.py +++ b/test/integration/models/test_model.py @@ -19,8 +19,8 @@ from fedot.core.operations.evaluation.operation_implementations.models.ts_implementations.statsmodels import \ GLMImplementation from fedot.core.operations.model import Model -from fedot.core.pipelines.node import PipelineNode from fedot.core.operations.operation_parameters import get_default_params, OperationParameters +from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.operation_types_repository import OperationTypesRepository diff --git a/test/unit/models/test_models_params.py b/test/integration/models/test_models_params.py similarity index 100% rename from test/unit/models/test_models_params.py rename to test/integration/models/test_models_params.py diff --git a/test/unit/models/test_repository.py b/test/integration/models/test_repository.py similarity index 98% rename from test/unit/models/test_repository.py rename to test/integration/models/test_repository.py index 7d366e3b65..418134e23f 100644 --- a/test/unit/models/test_repository.py +++ b/test/integration/models/test_repository.py @@ -26,7 +26,7 @@ def test_lazy_load(): def test_search_in_repository_by_tag_and_metainfo_correct(): with OperationTypesRepository() as repo: model_names = repo.suitable_operation(task_type=TaskTypesEnum.regression, - tags=['ml']) + tags=['ml']) assert 'linear' in model_names assert len(model_names) == 14 diff --git a/test/unit/models/test_split_train_test.py b/test/integration/models/test_split_train_test.py similarity index 98% rename from test/unit/models/test_split_train_test.py rename to test/integration/models/test_split_train_test.py index 78a4c5b19c..1d9b6f0efb 100644 --- a/test/unit/models/test_split_train_test.py +++ b/test/integration/models/test_split_train_test.py @@ -1,5 +1,6 @@ import random from copy import deepcopy +from typing import Tuple import numpy as np from sklearn.datasets import make_classification @@ -24,7 +25,7 @@ def generate_pipeline() -> Pipeline: return pipeline -def get_roc_auc_value(pipeline: Pipeline, train_data: InputData, test_data: InputData) -> (float, float): +def get_roc_auc_value(pipeline: Pipeline, train_data: InputData, test_data: InputData) -> Tuple[float, float]: train_pred = pipeline.predict(input_data=train_data) test_pred = pipeline.predict(input_data=test_data) roc_auc_value_test = roc_auc(y_true=test_data.target, y_score=test_pred.predict) diff --git a/test/unit/models/test_strategy.py b/test/integration/models/test_strategy.py similarity index 100% rename from test/unit/models/test_strategy.py rename to test/integration/models/test_strategy.py diff --git a/test/integration/multimodal/__init__.py b/test/integration/multimodal/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/multimodal/test_multi_modal.py b/test/integration/multimodal/test_multimodal.py similarity index 56% rename from test/unit/multimodal/test_multi_modal.py rename to test/integration/multimodal/test_multimodal.py index 1b2c14faf6..8a1528af29 100644 --- a/test/unit/multimodal/test_multi_modal.py +++ b/test/integration/multimodal/test_multimodal.py @@ -1,11 +1,9 @@ -import os - from examples.advanced.multi_modal_pipeline import prepare_multi_modal_data +from fedot.core.data.multi_modal import MultiModalData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import fedot_project_root -from fedot.core.data.multi_modal import MultiModalData def generate_multi_modal_pipeline(data: MultiModalData): @@ -33,27 +31,8 @@ def generate_multi_modal_pipeline(data: MultiModalData): return pipeline -def generate_multi_task_pipeline(): - ds_regr = PipelineNode('data_source_table/regr') - ds_class = PipelineNode('data_source_table/class') - - scaling_node_regr = PipelineNode('scaling', nodes_from=[ds_regr]) - scaling_node_class = PipelineNode('scaling', nodes_from=[ds_class]) - - dt_class_node = PipelineNode('dt', nodes_from=[scaling_node_class]) - - scaling_node_class_2 = PipelineNode('scaling', nodes_from=[dt_class_node]) - - root_regr = PipelineNode('dtreg', nodes_from=[scaling_node_regr, scaling_node_class_2]) - - initial_pipeline = Pipeline(root_regr) - - return initial_pipeline - - def test_multi_modal_pipeline(): - files_path = os.path.join('test', 'data', 'multi_modal') - path = os.path.join(str(fedot_project_root()), files_path) + path = fedot_project_root().joinpath('test', 'data', 'multi_modal') task = Task(TaskTypesEnum.classification) images_size = (128, 128) @@ -64,16 +43,3 @@ def test_multi_modal_pipeline(): prediction = pipeline.predict(fit_data) assert prediction is not None - - -def test_finding_side_root_node_in_multi_modal_pipeline(): - reg_root_node = 'dtreg' - class_root_node = 'dt' - - pipeline = generate_multi_task_pipeline() - - reg_pipeline = pipeline.pipeline_for_side_task(task_type=TaskTypesEnum.regression) - class_pipeline = pipeline.pipeline_for_side_task(task_type=TaskTypesEnum.classification) - - assert reg_pipeline.root_node.operation.operation_type == reg_root_node - assert class_pipeline.root_node.operation.operation_type == class_root_node diff --git a/test/integration/optimizer/__init__.py b/test/integration/optimizer/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/optimizer/test_evaluation.py b/test/integration/optimizer/test_evaluation.py similarity index 99% rename from test/unit/optimizer/test_evaluation.py rename to test/integration/optimizer/test_evaluation.py index cb0df5d08c..acef8bfb25 100644 --- a/test/unit/optimizer/test_evaluation.py +++ b/test/integration/optimizer/test_evaluation.py @@ -1,7 +1,6 @@ import datetime import pytest - from golem.core.optimisers.fitness import Fitness, null_fitness from golem.core.optimisers.genetic.evaluation import MultiprocessingDispatcher, SequentialDispatcher, \ ObjectiveEvaluationDispatcher diff --git a/test/integration/pipelines/__init__.py b/test/integration/pipelines/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/integration/pipelines/tuning/__init__.py b/test/integration/pipelines/tuning/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/pipelines/tuning/test_pipeline_tuning.py b/test/integration/pipelines/tuning/test_pipeline_tuning.py similarity index 100% rename from test/unit/pipelines/tuning/test_pipeline_tuning.py rename to test/integration/pipelines/tuning/test_pipeline_tuning.py diff --git a/test/unit/pipelines/tuning/test_tuner_builder.py b/test/integration/pipelines/tuning/test_tuner_builder.py similarity index 86% rename from test/unit/pipelines/tuning/test_tuner_builder.py rename to test/integration/pipelines/tuning/test_tuner_builder.py index db1e1a34b1..978353dbfa 100644 --- a/test/unit/pipelines/tuning/test_tuner_builder.py +++ b/test/integration/pipelines/tuning/test_tuner_builder.py @@ -16,8 +16,8 @@ from fedot.core.pipelines.tuning.search_space import PipelineSearchSpace from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, MetricType +from test.integration.pipelines.tuning.test_pipeline_tuning import get_not_default_search_space from test.unit.optimizer.test_pipeline_objective_eval import pipeline_first_test -from test.unit.pipelines.tuning.test_pipeline_tuning import get_not_default_search_space from test.unit.validation.test_table_cv import get_classification_data @@ -58,17 +58,19 @@ def test_tuner_builder_with_custom_params(tuner_class): algo = rand.suggest search_space = get_not_default_search_space() - tuner = TunerBuilder(data.task)\ - .with_tuner(tuner_class)\ - .with_metric(metric)\ - .with_cv_folds(cv_folds)\ - .with_validation_blocks(validation_blocks)\ - .with_timeout(timeout)\ - .with_early_stopping_rounds(early_stopping)\ - .with_iterations(iterations)\ - .with_algo(algo)\ - .with_search_space(search_space)\ + tuner = ( + TunerBuilder(data.task) + .with_tuner(tuner_class) + .with_metric(metric) + .with_cv_folds(cv_folds) + .with_validation_blocks(validation_blocks) + .with_timeout(timeout) + .with_early_stopping_rounds(early_stopping) + .with_iterations(iterations) + .with_algo(algo) + .with_search_space(search_space) .build(data) + ) assert isinstance(tuner, tuner_class) assert np.isclose(tuner.objective_evaluate(pipeline).value, objective_evaluate.evaluate(pipeline).value) diff --git a/test/integration/quality/test_synthetic_tasks.py b/test/integration/quality/test_synthetic_tasks.py index ef58c631de..7089e876da 100644 --- a/test/integration/quality/test_synthetic_tasks.py +++ b/test/integration/quality/test_synthetic_tasks.py @@ -12,7 +12,7 @@ from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.tasks import Task, TaskTypesEnum -from test.unit.composer.test_composer import to_numerical +from test.integration.composer.test_composer import to_categorical_codes def get_regression_pipeline(): @@ -29,7 +29,7 @@ def get_regression_data(): file = '../../data/simple_regression_train.csv' input_data = InputData.from_csv( os.path.join(test_file_path, file), task=Task(TaskTypesEnum.regression)) - input_data.idx = to_numerical(categorical_ids=input_data.idx) + input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) return input_data diff --git a/test/integration/real_applications/test_examples.py b/test/integration/real_applications/test_examples.py index a20a49f794..833dd8bc40 100644 --- a/test/integration/real_applications/test_examples.py +++ b/test/integration/real_applications/test_examples.py @@ -1,6 +1,4 @@ -import os from datetime import timedelta -from pathlib import Path import numpy as np from sklearn.metrics import mean_squared_error @@ -22,8 +20,7 @@ def test_multiclass_example(): - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/multiclass_classification.csv') + file_path_train = fedot_project_root().joinpath('test/data/multiclass_classification.csv') pipeline = get_model(file_path_train, cur_lead_time=timedelta(seconds=5)) assert pipeline is not None @@ -47,16 +44,15 @@ def test_gapfilling_example(): def test_exogenous_ts_example(): - project_root_path = str(fedot_project_root()) - path = os.path.join(project_root_path, 'test/data/simple_sea_level.csv') + path = fedot_project_root().joinpath('test/data/simple_sea_level.csv') run_exogenous_experiment(path_to_file=path, len_forecast=50, with_exog=True) def test_nemo_multiple_points_example(): - project_root_path = str(fedot_project_root()) - path = os.path.join(project_root_path, 'test/data/ssh_points_grid_simple.csv') - exog_path = os.path.join(project_root_path, 'test/data/ssh_nemo_points_grid_simple.csv') + project_root_path = fedot_project_root() + path = project_root_path.joinpath('test/data/ssh_points_grid_simple.csv') + exog_path = project_root_path.joinpath('test/data/ssh_nemo_points_grid_simple.csv') run_multiple_example(path_to_file=path, path_to_exog_file=exog_path, out_path=None, diff --git a/test/integration/real_applications/test_real_cases.py b/test/integration/real_applications/test_real_cases.py index 9afbe63592..855a3a0161 100644 --- a/test/integration/real_applications/test_real_cases.py +++ b/test/integration/real_applications/test_real_cases.py @@ -1,4 +1,3 @@ -import os import random import numpy as np @@ -19,22 +18,14 @@ def test_credit_scoring_problem(): - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/simple_classification.csv') - file_path_test = file_path_train - full_path_train = os.path.join(str(fedot_project_root()), file_path_train) - full_path_test = os.path.join(str(fedot_project_root()), file_path_test) + full_path_train = full_path_test = fedot_project_root().joinpath('test/data/simple_classification.csv') roc_auc_test = run_credit_scoring_problem(full_path_train, full_path_test, timeout=5, target='Y') assert roc_auc_test > 0.5 def test_metocean_forecasting_problem(): - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/simple_time_series.csv') - file_path_test = file_path_train - full_path_train = os.path.join(str(fedot_project_root()), file_path_train) - full_path_test = os.path.join(str(fedot_project_root()), file_path_test) + full_path_train, full_path_test = fedot_project_root().joinpath('test/data/simple_time_series.csv') rmse = run_metocean_forecasting_problem(full_path_train, full_path_test, @@ -72,8 +63,7 @@ def test_river_levels_problem(): init_pipeline = Pipeline(node_final) - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/station_levels.csv') + file_path_train = fedot_project_root().joinpath('test/data/station_levels.csv') run_river_experiment(file_path=file_path_train, pipeline=init_pipeline, @@ -88,9 +78,7 @@ def test_river_levels_problem(): def test_spam_detection_problem(): """ Simple launch of spam detection case """ - project_root_path = str(fedot_project_root()) - file_path_train = os.path.join(project_root_path, 'test/data/spam_detection.csv') + file_path_train = fedot_project_root().joinpath('test/data/spam_detection.csv') # Classification task based on text data run_text_problem_from_saved_meta_file(file_path_train) - diff --git a/test/integration/remote/__init__.py b/test/integration/remote/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/unit/remote/test_remote_composer.py b/test/integration/remote/test_remote_composer.py similarity index 100% rename from test/unit/remote/test_remote_composer.py rename to test/integration/remote/test_remote_composer.py diff --git a/test/unit/utilities/test_pipeline_import_export.py b/test/integration/utilities/test_pipeline_import_export.py similarity index 99% rename from test/unit/utilities/test_pipeline_import_export.py rename to test/integration/utilities/test_pipeline_import_export.py index 36870b78cc..f10983ef3c 100644 --- a/test/unit/utilities/test_pipeline_import_export.py +++ b/test/integration/utilities/test_pipeline_import_export.py @@ -12,7 +12,7 @@ from fedot.core.pipelines.template import PipelineTemplate, extract_subtree_root from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import fedot_project_root -from test.unit.api.test_main_api import get_dataset +from test.integration.api.test_main_api import get_dataset from test.unit.data_operations.test_data_operations_implementations import get_mixed_data from test.unit.multimodal.data_generators import get_single_task_multimodal_tabular_data from test.unit.pipelines.test_decompose_pipelines import get_classification_data diff --git a/test/unit/utilities/test_project_import_export.py b/test/integration/utilities/test_project_import_export.py similarity index 97% rename from test/unit/utilities/test_project_import_export.py rename to test/integration/utilities/test_project_import_export.py index f50d534104..2b381e58b2 100644 --- a/test/unit/utilities/test_project_import_export.py +++ b/test/integration/utilities/test_project_import_export.py @@ -8,7 +8,7 @@ from fedot.api.main import Fedot from fedot.core.utils import fedot_project_root from fedot.utilities.project_import_export import export_project_to_zip, import_project_from_zip, DEFAULT_PROJECTS_PATH -from test.unit.models.test_atomized_model import create_pipeline +from test.integration.models.test_atomized_model import create_pipeline from test.unit.validation.test_table_cv import get_classification_data PATHS_TO_DELETE_AFTER_TEST = [] diff --git a/test/sensitivity/test_sensitivity.py b/test/sensitivity/test_sensitivity.py index 14bcd3dba8..1786a051e9 100644 --- a/test/sensitivity/test_sensitivity.py +++ b/test/sensitivity/test_sensitivity.py @@ -16,7 +16,7 @@ from fedot.sensitivity.pipeline_sensitivity import PipelineAnalysis from fedot.sensitivity.pipeline_sensitivity_facade import PipelineSensitivityAnalysis from fedot.sensitivity.sa_requirements import SensitivityAnalysisRequirements -from test.unit.utilities.test_pipeline_import_export import create_func_delete_files +from test.integration.utilities.test_pipeline_import_export import create_func_delete_files @pytest.fixture(scope='session', autouse=True) diff --git a/test/test_gpu_strategy.py b/test/test_gpu_strategy.py index f70bedfb33..cce62f134c 100644 --- a/test/test_gpu_strategy.py +++ b/test/test_gpu_strategy.py @@ -1,11 +1,11 @@ from typing import Tuple -from fedot.core.data.data import InputData, OutputData +from cuml.svm import SVC -from test.unit.models.test_split_train_test import get_synthetic_input_data -from fedot.core.operations.evaluation.gpu.common import CuMLEvaluationStrategy +from fedot.core.data.data import InputData, OutputData from fedot.core.operations.evaluation.gpu.classification import CuMLClassificationStrategy -from cuml.svm import SVC +from fedot.core.operations.evaluation.gpu.common import CuMLEvaluationStrategy +from test.integration.models.test_split_train_test import get_synthetic_input_data def get_synthetic_data() -> Tuple[InputData, InputData]: diff --git a/test/unit/api/test_api_cli_params.py b/test/unit/api/test_api_cli_params.py deleted file mode 100644 index 813d98d936..0000000000 --- a/test/unit/api/test_api_cli_params.py +++ /dev/null @@ -1,35 +0,0 @@ -import os -from fedot.core.utils import fedot_project_root -from fedot.api.fedot_cli import create_parser, separate_argparse_to_fedot, preprocess_keys, run_fedot, \ - arguments_dicts - -project_root_path = str(fedot_project_root()) -ts_train_path = os.path.join(project_root_path, 'test/data/simple_time_series.csv') -ts_call = f'--problem ts_forecasting --preset fast_train --timeout 0.1 --depth 3 --arity 3 \ - --popsize 3 --gen_num 5 --opers lagged linear ridge --tuning \ - 0 --cv_folds 2 --val_bl 2 --target sea_height --train {ts_train_path} \ - --test {ts_train_path} --for_len 10'.split() - -class_train_path = os.path.join(project_root_path, 'test/data/simple_classification.csv') -class_call = f'--problem classification --train {class_train_path} --test {class_train_path} --target Y \ - --preset fast_train --timeout 0.1 --depth 3 --arity 3 \ - --popsize 3 --gen_num 5 --tuning 1'.split() - - -def call_cli_with_parameters(call_string): - """ Function that imitates argparse api call""" - parser = create_parser(arguments_dicts) - parameters = parser.parse_args(call_string) - main_params, fit_params = separate_argparse_to_fedot(parameters) - preprocess_keys(main_params) - preprocess_keys(fit_params) - predictions = run_fedot(parameters, main_params, fit_params, save_predictions=False) - return predictions - - -def test_cli_with_parameters(): - """ Test all parameters used in cli are available from api""" - ts_predictions = call_cli_with_parameters(ts_call) - assert ts_predictions is not None - class_predictions = call_cli_with_parameters(class_call) - assert class_predictions is not None diff --git a/test/unit/api/test_api_safety.py b/test/unit/api/test_api_safety.py index 1fcd5dc5e5..8c929280c5 100644 --- a/test/unit/api/test_api_safety.py +++ b/test/unit/api/test_api_safety.py @@ -7,7 +7,7 @@ from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import TaskTypesEnum, Task from fedot.preprocessing.preprocessing import DataPreprocessor -from test.unit.api.test_main_api import default_params +from test.integration.api.test_main_api import TESTS_MAIN_API_DEFAULT_PARAMS def get_data_analyser_with_specific_params(max_size=18, max_cat_cardinality=5): @@ -108,7 +108,7 @@ def test_api_fit_predict_with_pseudo_large_dataset_with_onehot_correct(): """ Test if safe mode in API use OneHotEncoder with small data with small cardinality """ - model = Fedot(problem="classification", **default_params) + model = Fedot(problem="classification", **TESTS_MAIN_API_DEFAULT_PARAMS) model.data_analyser.max_size = 1000 data = get_small_cat_data() model.fit(features=data, predefined_model='auto') diff --git a/test/unit/api/test_assumption_builder.py b/test/unit/api/test_assumption_builder.py index 1cfe6edffe..a56bcbe34f 100644 --- a/test/unit/api/test_assumption_builder.py +++ b/test/unit/api/test_assumption_builder.py @@ -15,12 +15,11 @@ from fedot.core.repository.tasks import TaskTypesEnum, Task, TsForecastingParams from fedot.preprocessing.data_types import TableTypesCorrector from fedot.preprocessing.preprocessing import DataPreprocessor +from test.integration.api.test_main_api \ + import get_dataset from test.unit.dag.test_graph_utils import graphs_same - from test.unit.data_operations.test_data_operations_implementations \ import get_time_series, get_small_regression_dataset -from test.unit.api.test_main_api \ - import get_dataset, load_categorical_unimodal from test.unit.multimodal.data_generators import get_single_task_multimodal_tabular_data @@ -79,9 +78,7 @@ def test_preprocessing_builder_no_data(): def test_preprocessing_builder_with_data(): # TableTypesCorrector fills in .supplementary_data needed for preprocessing_builder data_reg = TableTypesCorrector().convert_data_for_fit(get_small_regression_dataset()[0]) - data_cats = TableTypesCorrector().convert_data_for_fit(load_categorical_unimodal()[0]) data_ts, _, _ = get_time_series() - data_ts_gaps = get_test_ts_gaps_data() assert pipeline_contains_all(preprocess(TaskTypesEnum.regression, data_reg), 'scaling') @@ -110,7 +107,7 @@ def test_assumptions_builder_unsuitable_available_operations(): available_operations = ['linear', 'xgboost', 'lagged'] default_builder = UniModalAssumptionsBuilder(train_input) - checked_builder = UniModalAssumptionsBuilder(train_input)\ + checked_builder = UniModalAssumptionsBuilder(train_input) \ .from_operations(available_operations) assert default_builder.build() == checked_builder.build() @@ -139,7 +136,7 @@ def impl_test_assumptions_builder_suitable_available_operations(task, train_inpu baseline_operation = baseline_pipeline.root_node.operation.operation_type available_operations.remove(baseline_operation) - checked_builder = AssumptionsBuilder.get(train_input)\ + checked_builder = AssumptionsBuilder.get(train_input) \ .from_operations(available_operations) checked_pipeline = checked_builder.build()[0] diff --git a/test/unit/api/test_presets.py b/test/unit/api/test_presets.py index 8323bf5525..acb900c221 100644 --- a/test/unit/api/test_presets.py +++ b/test/unit/api/test_presets.py @@ -6,7 +6,7 @@ from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.operation_types_repository import OperationTypesRepository, get_operations_for_task from fedot.core.repository.tasks import Task, TaskTypesEnum -from test.unit.api.test_main_api import data_with_binary_features_and_categorical_target +from test.integration.api.test_main_api import data_with_binary_features_and_categorical_target def test_presets_classification(): diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index daf3e6cc24..d1016dbd95 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -12,7 +12,6 @@ from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import fedot_project_root -from test.unit.tasks.test_classification import get_image_classification_data from test.unit.tasks.test_forecasting import get_ts_data_with_dt_idx @@ -98,14 +97,6 @@ def test_with_custom_target(): assert np.array_equal(expected_target, actual_target) -def test_data_from_image(): - _, _, dataset_to_validate = get_image_classification_data() - - assert dataset_to_validate.data_type == DataTypesEnum.image - assert type(dataset_to_validate.features) == np.ndarray - assert type(dataset_to_validate.target) == np.ndarray - - def test_data_from_json(): # several features files_path = os.path.join('test', 'data', 'multi_modal') diff --git a/test/unit/data/test_data_merge_text.py b/test/unit/data/test_data_merge_text.py index edcb934cc0..6080e7f0d8 100644 --- a/test/unit/data/test_data_merge_text.py +++ b/test/unit/data/test_data_merge_text.py @@ -1,8 +1,6 @@ -import os - +import numpy as np import pandas as pd import pytest -import numpy as np from fedot.core.data.data import OutputData from fedot.core.data.merge.data_merger import DataMerger @@ -12,8 +10,7 @@ def load_sample_text(file_path=None, label_col='label'): - base_dir = str(fedot_project_root()) - file_path = file_path or os.path.join(base_dir, 'cases', 'data', 'spam', 'spamham.csv') + file_path = file_path or fedot_project_root().joinpath('cases', 'data', 'spam', 'spamham.csv') df_text = pd.read_csv(file_path) df_text = df_text.sample(frac=1).reset_index(drop=True) @@ -33,7 +30,7 @@ def generate_output_texts(length=10, num_columns=1): features = np.hstack([np.expand_dims(features, axis=-1)] * num_columns) idx = np.arange(0, length) - return OutputData(idx, task, data_type, features=features, predict=features, target=None) + return OutputData(idx, task, data_type, features=features, predict=features, target=None) # len(params) is a number of input arrays, each element of params - number of columns in the corresponding array diff --git a/test/unit/data_operations/test_data_operation_params.py b/test/unit/data_operations/test_data_operation_params.py index 39a20d772e..53e1b43e57 100644 --- a/test/unit/data_operations/test_data_operation_params.py +++ b/test/unit/data_operations/test_data_operation_params.py @@ -1,4 +1,3 @@ -import os from copy import copy import numpy as np @@ -41,8 +40,7 @@ def test_lagged_with_invalid_params_fit_correctly(): len_forecast = 50 # The length of the time series is 500 elements - project_root_path = str(fedot_project_root()) - file_path = os.path.join(project_root_path, 'test/data/short_time_series.csv') + file_path = fedot_project_root().joinpath('test/data/short_time_series.csv') df = pd.read_csv(file_path) time_series = np.array(df['sea_height']) diff --git a/test/unit/multimodal/test_multimodal.py b/test/unit/multimodal/test_multimodal.py index ff8cff83c7..c2c19b93ec 100644 --- a/test/unit/multimodal/test_multimodal.py +++ b/test/unit/multimodal/test_multimodal.py @@ -1,4 +1,7 @@ from fedot.api.main import Fedot +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.repository.tasks import TaskTypesEnum from test.unit.multimodal.data_generators import get_single_task_multimodal_tabular_data @@ -28,3 +31,34 @@ def test_multimodal_api(): assert pipeline is not None assert (9, 1) == prediction.shape + + +def generate_multi_task_pipeline(): + ds_regr = PipelineNode('data_source_table/regr') + ds_class = PipelineNode('data_source_table/class') + + scaling_node_regr = PipelineNode('scaling', nodes_from=[ds_regr]) + scaling_node_class = PipelineNode('scaling', nodes_from=[ds_class]) + + dt_class_node = PipelineNode('dt', nodes_from=[scaling_node_class]) + + scaling_node_class_2 = PipelineNode('scaling', nodes_from=[dt_class_node]) + + root_regr = PipelineNode('dtreg', nodes_from=[scaling_node_regr, scaling_node_class_2]) + + initial_pipeline = Pipeline(root_regr) + + return initial_pipeline + + +def test_finding_side_root_node_in_multi_modal_pipeline(): + reg_root_node = 'dtreg' + class_root_node = 'dt' + + pipeline = generate_multi_task_pipeline() + + reg_pipeline = pipeline.pipeline_for_side_task(task_type=TaskTypesEnum.regression) + class_pipeline = pipeline.pipeline_for_side_task(task_type=TaskTypesEnum.classification) + + assert reg_pipeline.root_node.operation.operation_type == reg_root_node + assert class_pipeline.root_node.operation.operation_type == class_root_node diff --git a/test/unit/optimizer/gp_operators/test_mutation.py b/test/unit/optimizer/gp_operators/test_mutation.py index 030523b997..8cbdf06a82 100644 --- a/test/unit/optimizer/gp_operators/test_mutation.py +++ b/test/unit/optimizer/gp_operators/test_mutation.py @@ -2,7 +2,13 @@ from pathlib import Path import pytest +from golem.core.dag.graph_node import GraphNode +from golem.core.dag.verification_rules import DEFAULT_DAG_RULES +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.base_mutations import MutationStrengthEnum +from golem.core.optimisers.genetic.operators.mutation import Mutation +from golem.core.optimisers.graph import OptGraph, OptNode +from golem.core.optimisers.optimizer import GraphGenerationParams from fedot.core.composer.gp_composer.specific_operators import boosting_mutation from fedot.core.data.data import InputData @@ -14,13 +20,7 @@ from fedot.core.pipelines.pipeline_graph_generation_params import get_pipeline_generation_params from fedot.core.repository.operation_types_repository import get_operations_for_task from fedot.core.repository.tasks import Task, TaskTypesEnum -from golem.core.dag.graph_node import GraphNode -from golem.core.dag.verification_rules import DEFAULT_DAG_RULES -from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters -from golem.core.optimisers.genetic.operators.mutation import Mutation -from golem.core.optimisers.graph import OptGraph, OptNode -from golem.core.optimisers.optimizer import GraphGenerationParams -from test.unit.composer.test_composer import to_numerical +from test.integration.composer.test_composer import to_categorical_codes from test.unit.dag.test_graph_utils import find_first from test.unit.tasks.test_forecasting import get_ts_data @@ -35,7 +35,7 @@ def get_requirements_and_params_for_task(task: TaskTypesEnum): def file_data(): test_file_path = Path(__file__).parents[3].joinpath('data', 'simple_classification.csv') input_data = InputData.from_csv(test_file_path) - input_data.idx = to_numerical(categorical_ids=input_data.idx) + input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) return input_data diff --git a/test/unit/optimizer/test_external.py b/test/unit/optimizer/test_external.py index e53d661da0..39434b1f3f 100644 --- a/test/unit/optimizer/test_external.py +++ b/test/unit/optimizer/test_external.py @@ -13,7 +13,7 @@ from fedot.core.data.data_split import train_test_data_setup from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline -from test.unit.models.test_model import classification_dataset +from test.integration.models.test_model import classification_dataset _ = classification_dataset # to avoid auto-removing of import diff --git a/test/unit/optimizer/test_pipeline_objective_eval.py b/test/unit/optimizer/test_pipeline_objective_eval.py index 97a92fd743..cfb3f86444 100644 --- a/test/unit/optimizer/test_pipeline_objective_eval.py +++ b/test/unit/optimizer/test_pipeline_objective_eval.py @@ -4,7 +4,6 @@ import numpy as np import pytest - from golem.core.optimisers.fitness import SingleObjFitness from fedot.core.data.data import InputData @@ -19,7 +18,7 @@ RegressionMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.validation.split import tabular_cv_generator, OneFoldInputDataSplit -from test.unit.models.test_model import classification_dataset +from test.integration.models.test_model import classification_dataset from test.unit.tasks.test_forecasting import get_simple_ts_pipeline from test.unit.validation.test_table_cv import sample_pipeline from test.unit.validation.test_time_series_cv import configure_experiment diff --git a/test/unit/pipelines/test_pipeline.py b/test/unit/pipelines/test_pipeline.py index 6355218a93..11e597ae11 100644 --- a/test/unit/pipelines/test_pipeline.py +++ b/test/unit/pipelines/test_pipeline.py @@ -8,7 +8,6 @@ from random import seed import numpy as np -import pandas as pd import pytest from sklearn.datasets import load_iris @@ -20,8 +19,9 @@ from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams from fedot.core.utils import probs_to_labels from fedot.preprocessing.preprocessing import DataPreprocessor +from test.integration.composer.test_composer import to_categorical_codes +from test.integration.models.test_model import classification_dataset_with_redundant_features from test.unit.dag.test_graph_operator import get_pipeline -from test.unit.models.test_model import classification_dataset_with_redundant_features from test.unit.pipelines.test_pipeline_comparison import pipeline_first from test.unit.tasks.test_forecasting import get_ts_data @@ -55,15 +55,10 @@ def file_data_setup(): file = '../../data/simple_classification.csv' input_data = InputData.from_csv( os.path.join(test_file_path, file)) - input_data.idx = _to_numerical(categorical_ids=input_data.idx) + input_data.idx = to_categorical_codes(categorical_ids=input_data.idx) return input_data -def _to_numerical(categorical_ids: np.ndarray): - encoded = pd.factorize(categorical_ids)[0] - return encoded - - @pytest.mark.parametrize('data_fixture', ['data_setup', 'file_data_setup']) def test_nodes_sequence_fit_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) @@ -179,7 +174,7 @@ def test_secondary_nodes_is_invariant_to_inputs_order(data_setup): second = PipelineNode(operation_type='lda') third = PipelineNode(operation_type='knn') final = PipelineNode(operation_type='logit', - nodes_from=[first, second, third]) + nodes_from=[first, second, third]) pipeline = Pipeline() for node in [first, second, third, final]: @@ -190,7 +185,7 @@ def test_secondary_nodes_is_invariant_to_inputs_order(data_setup): third = deepcopy(third) final_shuffled = PipelineNode(operation_type='logit', - nodes_from=[third, first, second]) + nodes_from=[third, first, second]) pipeline_shuffled = Pipeline() # change order of nodes in list @@ -265,7 +260,7 @@ def test_pipeline_str(): second = PipelineNode(operation_type='lda') third = PipelineNode(operation_type='knn') final = PipelineNode(operation_type='rf', - nodes_from=[first, second, third]) + nodes_from=[first, second, third]) pipeline = Pipeline() pipeline.add_node(final) @@ -283,7 +278,7 @@ def test_pipeline_repr(): second = PipelineNode(operation_type='lda') third = PipelineNode(operation_type='knn') final = PipelineNode(operation_type='rf', - nodes_from=[first, second, third]) + nodes_from=[first, second, third]) pipeline = Pipeline() pipeline.add_node(final) @@ -313,7 +308,7 @@ def test_delete_node_with_redirection(): second = PipelineNode(operation_type='lda') third = PipelineNode(operation_type='knn', nodes_from=[first, second]) final = PipelineNode(operation_type='rf', - nodes_from=[third]) + nodes_from=[third]) pipeline = Pipeline() pipeline.add_node(final) @@ -329,7 +324,7 @@ def test_delete_primary_node(): second = PipelineNode(operation_type='lda') third = PipelineNode(operation_type='knn', nodes_from=[first]) final = PipelineNode(operation_type='rf', - nodes_from=[second, third]) + nodes_from=[second, third]) pipeline = Pipeline(final) # when diff --git a/test/unit/preprocessing/test_preprocessors.py b/test/unit/preprocessing/test_preprocessors.py index 485593d300..35936dc766 100644 --- a/test/unit/preprocessing/test_preprocessors.py +++ b/test/unit/preprocessing/test_preprocessors.py @@ -1,5 +1,3 @@ -import os - import numpy as np import pandas as pd from golem.core.log import default_log @@ -10,9 +8,9 @@ from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import TaskTypesEnum, Task +from fedot.core.utils import fedot_project_root from fedot.preprocessing.data_types import TableTypesCorrector, apply_type_transformation from fedot.preprocessing.structure import DEFAULT_SOURCE_NAME -from test.unit.api.test_api_cli_params import project_root_path from test.unit.preprocessing.test_pipeline_preprocessing import data_with_mixed_types_in_each_column, \ correct_preprocessing_params @@ -38,7 +36,7 @@ def get_mixed_data_with_str_and_float_values(idx: int = None): def get_data_with_string_columns(): - file_path = os.path.join(project_root_path, 'test/data/data_with_mixed_column.csv') + file_path = fedot_project_root().joinpath('test/data/data_with_mixed_column.csv') df = pd.read_csv(file_path) task = Task(TaskTypesEnum.classification) @@ -258,9 +256,9 @@ def test_str_numbers_with_dots_and_commas_in_predict(): converted to ints even if it contains str with dots/commas""" task = Task(TaskTypesEnum.classification) features = np.array([['8,5'], - ['4.9'], - ['3,2'], - ['6.1']], dtype=object) + ['4.9'], + ['3,2'], + ['6.1']], dtype=object) target = np.array([['no'], ['yes'], ['yes'], ['yes']]) input_data = InputData(idx=np.arange(4), features=features, target=target, task=task, data_type=DataTypesEnum.table) diff --git a/test/unit/tasks/test_classification.py b/test/unit/tasks/test_classification.py index c1bf2921ee..8bc423afb8 100644 --- a/test/unit/tasks/test_classification.py +++ b/test/unit/tasks/test_classification.py @@ -1,13 +1,6 @@ import os import numpy as np -from golem.utilities.requirements_notificator import warn_requirement - -try: - import tensorflow as tf -except ModuleNotFoundError: - warn_requirement('tensorflow') - from sklearn.datasets import load_iris, make_classification from sklearn.metrics import roc_auc_score as roc_auc @@ -15,28 +8,11 @@ from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData -from fedot.core.operations.evaluation.operation_implementations.models.keras import ( - FedotCNNImplementation, - check_input_array, - create_deep_cnn, - fit_cnn, - predict_cnn -) from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import Task, TaskTypesEnum -from test.unit.common_tests import is_predict_ignores_target -from test.unit.models.test_model import classification_dataset_with_redundant_features - - -def check_predict_cnn_correct(model, dataset_to_validate): - return is_predict_ignores_target( - predict_func=predict_cnn, - predict_args={'trained_model': model}, - data_arg_name='predict_data', - input_data=dataset_to_validate, - ) +from test.integration.models.test_model import classification_dataset_with_redundant_features def pipeline_simple() -> Pipeline: @@ -191,49 +167,3 @@ def test_output_mode_full_probs(): assert np.array_equal(results_probs.predict, results_default.predict) assert results.predict.shape == (len(test_data.target), 2) assert results_probs.predict.shape == (len(test_data.target), 1) - - -def test_image_classification_quality(): - roc_auc_on_valid, _, _ = get_image_classification_data() - deviation_composite = roc_auc_on_valid - 0.5 - - roc_auc_on_valid, _, _ = get_image_classification_data(composite_flag=False) - deviation_simple = roc_auc_on_valid - 0.5 - - assert abs(deviation_composite) < 0.25 - assert abs(deviation_simple) < 0.35 - - -def test_cnn_custom_class(): - cnn_class = FedotCNNImplementation() - - assert cnn_class.params is not None - assert type(cnn_class) == FedotCNNImplementation - - -def test_cnn_methods(): - _, dataset_to_train, dataset_to_validate = get_image_classification_data() - image_shape = (28, 28, 1) - num_classes = 7 - epochs = 10 - batch_size = 128 - - cnn_model = create_deep_cnn(input_shape=image_shape, - num_classes=num_classes) - - transformed_x_train, transform_flag = check_input_array(x_train=dataset_to_train.features) - - model = fit_cnn(train_data=dataset_to_train, - model=cnn_model, - epochs=epochs, - batch_size=batch_size) - - prediction = predict_cnn(trained_model=model, - predict_data=dataset_to_validate) - - assert type(cnn_model) == tf.keras.Sequential - assert transform_flag is True - assert cnn_model.input_shape[1:] == image_shape - assert cnn_model.output_shape[1] == num_classes - assert type(prediction) == np.ndarray - assert check_predict_cnn_correct(model, dataset_to_validate) diff --git a/test/unit/tasks/test_clustering.py b/test/unit/tasks/test_clustering.py index a48156021a..2dd78b3003 100644 --- a/test/unit/tasks/test_clustering.py +++ b/test/unit/tasks/test_clustering.py @@ -3,7 +3,7 @@ from fedot.core.data.data_split import train_test_data_setup from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline -from test.unit.models.test_split_train_test import get_roc_auc_value, get_synthetic_input_data +from test.integration.models.test_split_train_test import get_roc_auc_value, get_synthetic_input_data def generate_pipeline() -> Pipeline: diff --git a/test/unit/tasks/test_forecasting.py b/test/unit/tasks/test_forecasting.py index a74dcbf50d..88b2fe06cd 100644 --- a/test/unit/tasks/test_forecasting.py +++ b/test/unit/tasks/test_forecasting.py @@ -1,4 +1,3 @@ -import os from copy import deepcopy from random import seed from typing import Optional @@ -41,8 +40,7 @@ def get_ts_data(n_steps: int = 80, forecast_length: int = 5, validation_blocks: :param forecast_length: the length of forecast :param validation_blocks: number of validation blocks """ - project_root_path = str(fedot_project_root()) - file_path = os.path.join(project_root_path, 'test/data/simple_time_series.csv') + file_path = fedot_project_root().joinpath('test/data/simple_time_series.csv') df = pd.read_csv(file_path) time_series = np.array(df['sea_height'])[:n_steps] @@ -64,8 +62,7 @@ def get_ts_data_with_dt_idx(n_steps=80, forecast_length=5): :param n_steps: number of elements in time series to take :param forecast_length: the length of forecast """ - project_root_path = str(fedot_project_root()) - file_path = os.path.join(project_root_path, 'test/data/simple_sea_level.csv') + file_path = fedot_project_root().joinpath('test/data/simple_sea_level.csv') df = pd.read_csv(file_path) time_series = np.array(df.iloc[:n_steps, 1]) @@ -256,7 +253,7 @@ def test_ts_single_pipeline_model_without_multioutput_support(): def test_exception_if_incorrect_forecast_length(): with pytest.raises(ValueError) as exc: _, _ = get_ts_data(forecast_length=0) - assert str(exc.value) == f'Forecast length should be more then 0' + assert str(exc.value) == 'Forecast length should be more then 0' def test_multistep_out_of_sample_forecasting(): @@ -297,7 +294,7 @@ def test_ts_forecasting_with_multiple_series_in_lagged(): """ Test pipeline predict correctly when lagged operation get several time series """ horizon = 3 n_steps = 50 - train_data, test_data = get_ts_data(n_steps=n_steps + horizon, forecast_length=horizon) + train_data, test_data = get_ts_data(n_steps=n_steps + horizon, forecast_length=horizon) pipeline = get_multiple_ts_pipeline() pipeline.fit(train_data) diff --git a/test/unit/tasks/test_multi_ts_forecast.py b/test/unit/tasks/test_multi_ts_forecast.py index 613125b156..15c6bf15f0 100644 --- a/test/unit/tasks/test_multi_ts_forecast.py +++ b/test/unit/tasks/test_multi_ts_forecast.py @@ -1,4 +1,3 @@ -import os from typing import Optional import numpy as np @@ -14,8 +13,7 @@ def get_multi_ts_data(forecast_length: int = 5, validation_blocks: Optional[int] = None): task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=forecast_length)) - project_root_path = str(fedot_project_root()) - file_path = os.path.join(project_root_path, 'test/data/synthetic_multi_ts.csv') + file_path = fedot_project_root().joinpath('test/data/synthetic_multi_ts.csv') data = InputData.from_csv_multi_time_series( file_path=file_path, task=task) diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index 3fdde5a895..17eff41c7b 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -1,12 +1,9 @@ import logging -import os from datetime import timedelta from functools import partial import pytest from golem.core.tuning.simultaneous import SimultaneousTuner - -from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from sklearn.metrics import roc_auc_score as roc_auc from sklearn.model_selection import KFold, StratifiedKFold @@ -19,13 +16,14 @@ from fedot.core.optimisers.objective.metrics_objective import MetricsObjective from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.operation_types_repository import OperationTypesRepository from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum +from fedot.core.utils import fedot_project_root from fedot.core.validation.split import tabular_cv_generator -from test.unit.api.test_api_cli_params import project_root_path -from test.unit.models.test_model import classification_dataset +from test.integration.models.test_model import classification_dataset from test.unit.tasks.test_classification import get_iris_data, pipeline_simple _ = classification_dataset @@ -33,12 +31,12 @@ def sample_pipeline(): return Pipeline(PipelineNode(operation_type='logit', - nodes_from=[PipelineNode(operation_type='rf'), - PipelineNode(operation_type='scaling')])) + nodes_from=[PipelineNode(operation_type='rf'), + PipelineNode(operation_type='scaling')])) def get_classification_data(): - file_path = os.path.join(project_root_path, 'test/data/simple_classification.csv') + file_path = fedot_project_root().joinpath('test/data/simple_classification.csv') input_data = InputData.from_csv(file_path, task=Task(TaskTypesEnum.classification)) return input_data @@ -86,12 +84,14 @@ def test_tuner_cv_classification_correct(): dataset = get_iris_data() simple_pipeline = pipeline_simple() - tuner = TunerBuilder(dataset.task).with_tuner(SimultaneousTuner)\ - .with_metric(ClassificationMetricsEnum.ROCAUC)\ - .with_cv_folds(folds) \ - .with_iterations(1) \ - .with_timeout(timedelta(minutes=1))\ + tuner = ( + TunerBuilder(dataset.task).with_tuner(SimultaneousTuner) + .with_metric(ClassificationMetricsEnum.ROCAUC) + .with_cv_folds(folds) + .with_iterations(1) + .with_timeout(timedelta(minutes=1)) .build(dataset) + ) tuned = tuner.tune(simple_pipeline) assert tuned