# Data Science Pipeline

In [1]:
from kedro.pipeline import Pipeline, node
from spaceflights.pipelines.data_science.nodes import split_data, train_model, evaluate_model

def create_pipeline(**kwargs):
    return Pipeline(
        [
            node(
                func=split_data,
                inputs=['master_table', 'parameters'],
                outputs=['X_train', 'X_test', 'y_train', 'y_test'],
                name='split_data_node'
            ),
            node(
                func=train_model,
                inputs=['X_train', 'y_train'],
                outputs='regressor',
                name='train_model_node'
            ),
            node(
                func=evaluate_model,
                inputs=['regressor', 'X_test', 'y_test'],
                outputs=None,
                name='evaluate_model_node'
            )
        ]
    )



In [2]:
pipeline = create_pipeline()

In [3]:
from kedro.runner import SequentialRunner
runner = SequentialRunner()

In [4]:
runner.run(pipeline, catalog)

2021-11-11 22:57:58,317 - kedro.io.data_catalog - INFO - Loading data from `master_table` (CSVDataSet)...
2021-11-11 22:57:58,391 - kedro.io.data_catalog - INFO - Loading data from `parameters` (MemoryDataSet)...
2021-11-11 22:57:58,391 - kedro.pipeline.node - INFO - Running node: split_data_node: split_data([master_table,parameters]) -> [X_test,X_train,y_test,y_train]
2021-11-11 22:57:58,397 - kedro.io.data_catalog - INFO - Saving data to `X_train` (MemoryDataSet)...
2021-11-11 22:57:58,398 - kedro.io.data_catalog - INFO - Saving data to `X_test` (MemoryDataSet)...
2021-11-11 22:57:58,399 - kedro.io.data_catalog - INFO - Saving data to `y_train` (MemoryDataSet)...
2021-11-11 22:57:58,400 - kedro.io.data_catalog - INFO - Saving data to `y_test` (MemoryDataSet)...
2021-11-11 22:57:58,401 - kedro.runner.sequential_runner - INFO - Completed 1 out of 3 tasks
2021-11-11 22:57:58,402 - kedro.io.data_catalog - INFO - Loading data from `X_train` (MemoryDataSet)...
2021-11-11 22:57:58,403 - ked

{}