In [1]:
import pandas as pd
from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

In [2]:
target_columns = ["Pastry", "Z_Scratch", "K_Scatch", "Stains", "Dirtiness", "Bumps", "Other_Faults"]

path = "../datasets/playground-series-s4e3"

train = InputData.from_csv(f'{path}/train.csv', target_columns=target_columns, columns_to_drop=['id'])
test = InputData.from_csv(f'{path}/test.csv', target_columns=None, columns_to_drop=['id'])
sub = pd.read_csv(f"{path}/sample_submission.csv")

2024-07-15 10:31:15,780 - CSV data extraction - Used the column as index: "id".
2024-07-15 10:31:15,852 - CSV data extraction - Used the column as index: "id".


In [3]:
auto_model = Fedot(
    problem="classification",
    # metric=["precision", "accuracy", "roc_auc"],
    preset="best_quality",
    with_tuning=True,
    timeout=5,
    initial_assumption=PipelineBuilder()
    .add_node(
        "scaling",
    )
    .add_node(
        "xgboost",
    )
    .build(),
    cv_folds=10,
    seed=42,
    n_jobs=4,
    logging_level=10,
    use_pipelines_cache=False,
    use_auto_preprocessing=False
)

In [4]:
auto_model.fit(features=train)

2024-07-15 10:31:26,863 - AssumptionsHandler - Initial pipeline fitting started
2024-07-15 10:31:28,189 - PipelineNode - Trying to fit pipeline node with operation: xgboost
2024-07-15 10:31:28,190 - PipelineNode - Fit all parent nodes in secondary node with operation: xgboost
2024-07-15 10:31:28,192 - PipelineNode - Trying to fit pipeline node with operation: scaling
2024-07-15 10:31:30,985 - PipelineNode - Obtain prediction in pipeline node by operation: xgboost
2024-07-15 10:31:30,987 - PipelineNode - Fit all parent nodes in secondary node with operation: xgboost
2024-07-15 10:31:30,989 - PipelineNode - Obtain prediction in pipeline node by operation: scaling
2024-07-15 10:31:31,039 - AssumptionsHandler - Initial pipeline was fitted successfully
2024-07-15 10:31:31,041 - AssumptionsHandler - Memory consumption for fitting of the initial pipeline in main session: current 29.1 MiB, max: 45.1 MiB
2024-07-15 10:31:31,046 - ApiComposer - Initial pipeline was fitted in 5.1 sec.
2024-07-15 

Generations:   0%|          | 0/10000 [00:00<?, ?gen/s]

2024-07-15 10:31:32,019 - MultiprocessingDispatcher - Number of used CPU's: 4
2024-07-15 10:31:32,029 - MultiprocessingDispatcher - 0 individuals out of 0 in previous population were evaluated successfully. 0% is a fairly small percentage of successful evaluation.
2024-07-15 10:31:32,031 - EvoGraphOptimizer - Generation num: 1 size: 0
2024-07-15 10:31:32,033 - EvoGraphOptimizer - Best individuals: HallOfFame archive fitness (0): []
2024-07-15 10:31:32,036 - EvoGraphOptimizer - no improvements for 1 iterations
2024-07-15 10:31:32,039 - EvoGraphOptimizer - spent time: 0.0 min


Generations:   0%|          | 0/10000 [00:00<?, ?gen/s]

2024-07-15 10:31:32,043 - OptimisationTimer - Composition time: 0.0 min





IndexError: list index out of range

In [None]:
prediction = auto_model.predict(features=test)

2024-07-11 16:29:09,794 - PipelineNode - Obtain prediction in pipeline node by operation: xgboost
2024-07-11 16:29:09,799 - PipelineNode - Fit all parent nodes in secondary node with operation: xgboost
2024-07-11 16:29:09,802 - PipelineNode - Obtain prediction in pipeline node by operation: scaling


In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(test.Load_Type, prediction)

In [None]:
for i, col_name in enumerate(target_columns):
    sub[col_name] = prediction[:, i]
sub.to_csv("submission.csv", index=False)