In [1]:
from d3m_interface.data_converter import export_pipeline_code 

In [2]:
import json

with open('pipeline_example.json') as f:
    pipeline_template = json.load(f)

In [3]:
code = export_pipeline_code(pipeline_template, True)

In [4]:

from d3m_interface.pipeline import Pipeline
pipeline = Pipeline(origin='export', dataset='dataset')
input_data = pipeline.make_data_module()

step_0 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.denormalize.Common')

pipeline.connect(input_data, step_0, from_output='dataset')

step_1 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.dataset_to_dataframe.Common')

pipeline.connect(step_0, step_1, from_output='produce', to_input='inputs')
pipeline.connect(step_0, step_1, from_output='index', to_input='index')

step_2 = pipeline.make_pipeline_module('d3m.primitives.data_preprocessing.text_reader.Common')
pipeline.set_hyperparams(step_2, return_result='replace')

pipeline.connect(step_1, step_2, from_output='produce', to_input='inputs')
pipeline.connect(step_1, step_2, from_output='index', to_input='index')

step_3 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.add_semantic_types.Common')
pipeline.set_hyperparams(step_3, columns=[1], semantic_types=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])

pipeline.connect(step_2, step_3, from_output='produce', to_input='inputs')
pipeline.connect(step_2, step_3, from_output='index', to_input='index')

step_4 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.add_semantic_types.Common')
pipeline.set_hyperparams(step_4, columns=[1], semantic_types=['http://schema.org/Boolean'])

pipeline.connect(step_3, step_4, from_output='produce', to_input='inputs')
pipeline.connect(step_3, step_4, from_output='index', to_input='index')

step_5 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.add_semantic_types.Common')
pipeline.set_hyperparams(step_5, columns=[2], semantic_types=['http://schema.org/Text'])

pipeline.connect(step_4, step_5, from_output='produce', to_input='inputs')
pipeline.connect(step_4, step_5, from_output='index', to_input='index')

step_6 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.add_semantic_types.Common')
pipeline.set_hyperparams(step_6, columns=[2], semantic_types=['https://metadata.datadrivendiscovery.org/types/Attribute'])

pipeline.connect(step_5, step_6, from_output='produce', to_input='inputs')
pipeline.connect(step_5, step_6, from_output='index', to_input='index')

step_7 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.column_parser.Common')

pipeline.connect(step_6, step_7, from_output='produce', to_input='inputs')
pipeline.connect(step_6, step_7, from_output='index', to_input='index')

step_8 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')
pipeline.set_hyperparams(step_8, semantic_types=['https://metadata.datadrivendiscovery.org/types/Attribute'], exclude_columns=[])

pipeline.connect(step_7, step_8, from_output='produce', to_input='inputs')
pipeline.connect(step_7, step_8, from_output='index', to_input='index')

step_9 = pipeline.make_pipeline_module('d3m.primitives.data_preprocessing.tfidf_vectorizer.SKlearn')
pipeline.set_hyperparams(step_9, use_semantic_types=True, return_result='replace', stop_words={'case': 'string', 'value': 'english'})

pipeline.connect(step_8, step_9, from_output='produce', to_input='inputs')
pipeline.connect(step_8, step_9, from_output='index', to_input='index')

step_10 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')
pipeline.set_hyperparams(step_10, semantic_types=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])

pipeline.connect(step_6, step_10, from_output='produce', to_input='inputs')
pipeline.connect(step_9, step_10, from_output='index', to_input='index')

step_11 = pipeline.make_pipeline_module('d3m.primitives.classification.gradient_boosting.SKlearn')

pipeline.connect(step_9, step_11, from_output='produce', to_input='inputs')
pipeline.connect(step_10, step_11, from_output='produce', to_input='outputs')
pipeline.connect(step_10, step_11, from_output='index', to_input='index')

step_12 = pipeline.make_pipeline_module('d3m.primitives.data_transformation.construct_predictions.Common')

pipeline.connect(step_11, step_12, from_output='produce', to_input='inputs')
pipeline.connect(step_6, step_12, from_output='produce', to_input='reference')
pipeline.connect(step_11, step_12, from_output='index', to_input='index')



In [5]:
from d3m_interface.data_converter import to_d3m_json

In [6]:
d3m_json = to_d3m_json(pipeline)

In [7]:
d3m_json

{'id': '72507d9b-33f5-4c07-99e1-1f8ed9871dd6',
 'name': '72507d9b-33f5-4c07-99e1-1f8ed9871dd6',
 'description': 'export',
 'schema': 'https://metadata.datadrivendiscovery.org/schemas/v0/pipeline.json',
 'created': '2020-11-04T13:28:45.993727Z',
 'context': 'TESTING',
 'inputs': [{'name': 'input dataset'}],
 'outputs': [{'data': 'steps.12.produce', 'name': 'predictions'}],
 'steps': [{'type': 'PRIMITIVE',
   'primitive': {'id': UUID('0f151b51-98dd-4bcf-ba28-f2e345103072'),
    'version': '2019.10.10',
    'python_path': 'd3m',
    'name': 'd3m.primitives.data_transformation.denormalize.Common'},
   'arguments': {'inputs': {'type': 'CONTAINER', 'data': 'inputs.0'}},
   'outputs': [{'id': 'produce'}]},
  {'type': 'PRIMITIVE',
   'primitive': {'id': UUID('4ae8c47c-ce4d-493d-994d-5bf66e4ff869'),
    'version': '2019.10.10',
    'python_path': 'd3m',
    'name': 'd3m.primitives.data_transformation.dataset_to_dataframe.Common'},
   'arguments': {'inputs': {'type': 'CONTAINER', 'data': 'steps.