In [1]:
import pandas as pd
import numpy as np
from fedot.api.main import Fedot
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

In [2]:
train = pd.read_csv('/Users/vadim/Desktop/FEDOT Intro/playground-series-s4e9/train.csv')
test = pd.read_csv('/Users/vadim/Desktop/FEDOT Intro/playground-series-s4e9/test.csv')
sub = pd.read_csv('/Users/vadim/Desktop/FEDOT Intro/playground-series-s4e9/sample_submission.csv')

train.drop(columns=['id'], inplace=True)
test.drop(columns=['id'], inplace=True)

In [3]:
train.shape

(188533, 12)

In [4]:
auto_model = Fedot(
    problem="regression",
    metric=["rmse"],
    preset="best_quality",
    with_tuning=True,
    timeout=5,
    cv_folds=10,
    seed=42,
    n_jobs=8,
    logging_level=10,
    initial_assumption=PipelineBuilder().add_node("lgbmreg").build(),
    use_pipelines_cache=False,
    use_auto_preprocessing=False,
)

2024-10-24 15:57:34,336 - Topological features operation requires extra dependencies for time series forecasting, which are not installed. It can infuence the performance. Please install it by 'pip install fedot[extra]'


## Observations:

- initial pipline requires a lot of memory: ~ 30 Gb (OS monitoring)
- In logs: 'ApiComposer - Initial pipeline was fitted in 4758.1 sec.'
- In fact it took exactly 10 times less: 16:05:59,551 - 15:58:03,813 = 7 minutes 56 seconds = 476 seconds
- x10 multiplicator comes from cv_folds=10

In [6]:
auto_model.fit(features=train, target="price")

2024-10-24 15:58:03,813 - AssumptionsHandler - Initial pipeline fitting started
2024-10-24 16:04:47,638 - PipelineNode - Trying to fit pipeline node with operation: lgbmreg
2024-10-24 16:05:59,366 - PipelineNode - Obtain prediction in pipeline node by operation: lgbmreg
2024-10-24 16:05:59,551 - AssumptionsHandler - Initial pipeline was fitted successfully
2024-10-24 16:05:59,554 - AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 1099.4 MiB, max: 20846.8 MiB
2024-10-24 16:05:59,600 - ApiComposer - Initial pipeline was fitted in 4758.1 sec.
2024-10-24 16:05:59,611 - ApiComposer - AutoML configured. Parameters tuning: True. Time limit: 5 min. Set of candidate models: ['adareg', 'catboostreg', 'dtreg', 'fast_ica', 'isolation_forest_reg', 'knnreg', 'lasso', 'lgbmreg', 'linear', 'normalization', 'pca', 'poly_features', 'ransac_lin_reg', 'ransac_non_lin_reg', 'rfr', 'ridge', 'scaling', 'sgdr', 'svr', 'xgboostreg'].
2024-10-24 16:05:59,625 -

{'depth': 1, 'length': 1, 'nodes': [lgbmreg]}

In [None]:
auto_model.current_pipeline.save(
    path='/Users/vadim/Desktop/FEDOT Intro/playground-series-s4e9/saved_pipelines',
    create_subdir=True,
    is_datetime_in_path=True,
)

In [8]:
#prediction = auto_model.predict(features=test)

#sub['price'] = prediction
#sub.to_csv('submission_1.csv', index=False)