In [1]:
from new_module_sdk import Workspace, DatasetX, ModuleStepX, ExperimentX
from azureml.studio.core.utils.column_selection import ColumnSelectionBuilder

### Initialize workspace

In [2]:
ws = Workspace.from_config()

### Get dataset

In [3]:
dataset = DatasetX.get_by_data_reference(ws, "GenericCSV/Automobile_price_data_(Raw)")

### Get and define modules

In [4]:
select_columns_in_dataset = ModuleStepX.get(ws, name='Select Columns in Dataset')

Initializing module: azureml://Select Columns in Dataset
Get inputs
inputs.dataset

Get outputs
outputs.results_dataset

Get params
params.select_columns = None, is_optional=False



In [5]:
select_columns_in_dataset.inputs.dataset = dataset
select_columns_in_dataset.params.select_columns = ColumnSelectionBuilder().include_all().exclude_col_names('normalized-losses')

In [6]:
clean_missing_data = ModuleStepX.get(ws, name='Clean Missing Data')

Initializing module: azureml://Clean Missing Data
Get inputs
inputs.dataset

Get outputs
outputs.cleaned_dataset
outputs.cleaning_transformation

Get params
params.columns_to_be_cleaned = 'All', is_optional=False
params.minimum_missing_value_ratio = '0.0', is_optional=False
params.maximum_missing_value_ratio = '1.0', is_optional=False
params.cleaning_mode = 'Custom substitution value', is_optional=False
params.replacement_value = None, is_optional=True
params.generate_missing_value_indicator_column = None, is_optional=True
params.cols_with_all_missing_values = None, is_optional=True



In [7]:
clean_missing_data.inputs.dataset = select_columns_in_dataset.outputs.results_dataset
clean_missing_data.params.columns_to_be_cleaned = ColumnSelectionBuilder().include_all()
clean_missing_data.params.minimum_missing_value_ratio = 0.0
clean_missing_data.params.maximum_missing_value_ratio = 1.0
clean_missing_data.params.cleaning_mode = 'Remove entire row'

In [8]:
split_data = ModuleStepX.get(ws, name='Split Data')

Initializing module: azureml://Split Data
Get inputs
inputs.dataset

Get outputs
outputs.results_dataset1
outputs.results_dataset2

Get params
params.splitting_mode = 'Split Rows', is_optional=False
params.fraction_of_rows_in_the_first_output_dataset = None, is_optional=True
params.randomized_split = None, is_optional=True
params.random_seed = None, is_optional=True
params.stratified_split = None, is_optional=True
params.stratification_key_column = None, is_optional=True
params.regular_expression = None, is_optional=True
params.relational_expression = None, is_optional=True



In [9]:
split_data.inputs.dataset = clean_missing_data.outputs.cleaned_dataset
split_data.params.fraction_of_rows_in_the_first_output_dataset = 0.7
split_data.params.splitting_mode = 'Split Rows'
split_data.params.randomized_split = True
split_data.params.stratified_split = False

In [10]:
linear_regression = ModuleStepX.get(ws, name='Linear Regression')

Initializing module: azureml://Linear Regression
Get inputs

Get outputs
outputs.untrained_model

Get params
params.solution_method = 'Ordinary Least Squares', is_optional=False
params.create_trainer_mode = None, is_optional=True
params.learning_rate = None, is_optional=True
params.number_of_epochs_over_which_algorithm_iterates_through_examples = None, is_optional=True
params.l2_regularization_term_weight = None, is_optional=True
params.range_for_learning_rate = None, is_optional=True
params.range_for_number_of_epochs_over_which_algorithm_iterates_through_examples = None, is_optional=True
params.range_for_l2_regularization_term_weight = None, is_optional=True
params.should_input_instances_be_normalized = None, is_optional=True
params.decrease_learning_rate_as_iterations_progress = None, is_optional=True
params.l2_regularization_weight = None, is_optional=True
params.include_intercept_term = None, is_optional=True
params.random_number_seed = None, is_optional=True



In [11]:
linear_regression.params.solution_method = 'Ordinary Least Squares'
linear_regression.params.l2_regularization_term_weight = 0.001
linear_regression.params.include_intercept_term = True
linear_regression.params.random_number_seed = 0

In [12]:
train_model = ModuleStepX.get(ws, name='Train Model')

Initializing module: azureml://Train Model
Get inputs
inputs.untrained_model
inputs.dataset

Get outputs
outputs.trained_model

Get params
params.label_column = None, is_optional=False



In [13]:
train_model.inputs.untrained_model = linear_regression.outputs.untrained_model
train_model.inputs.dataset = split_data.outputs.results_dataset1
train_model.params.label_column = ColumnSelectionBuilder().include_col_names('price')

In [14]:
score_model = ModuleStepX.get(ws, name='Score Model')

Initializing module: azureml://Score Model
Get inputs
inputs.trained_model
inputs.dataset

Get outputs
outputs.scored_dataset

Get params
params.append_score_columns_to_output = 'True', is_optional=False



In [15]:
score_model.inputs.trained_model = train_model.outputs.trained_model
score_model.inputs.dataset = split_data.outputs.results_dataset2
score_model.params.append_score_columns_to_output = True

In [16]:
evaluate_model = ModuleStepX.get(ws, name='Evaluate Model')

Initializing module: azureml://Evaluate Model
Get inputs
inputs.scored_dataset
inputs.scored_dataset_to_compare

Get outputs
outputs.evaluation_results

Get params



In [17]:
evaluate_model.inputs.scored_dataset = score_model.outputs.scored_dataset

### Submit and run experiment

In [18]:
steps = [select_columns_in_dataset, clean_missing_data, split_data, linear_regression, train_model, score_model, evaluate_model]
ExperimentX.submit(ws, steps, 'test_experiment_ex')

ModuleStep azureml://Select Columns in Dataset
Inputs:  {'Dataset': $AZUREML_DATAREFERENCE_Dataset}
Outputs:  {'Results_dataset': $AZUREML_DATAREFERENCE_2e83ccba9063409c953e0a8f850dda07}
Parameters:  {'Select Columns': <azureml.studio.core.utils.column_selection.ColumnSelectionBuilder object at 0x000001F9C7536828>}


ModuleStep azureml://Clean Missing Data
Inputs:  {'Dataset': $AZUREML_DATAREFERENCE_2e83ccba9063409c953e0a8f850dda07}
Outputs:  {'Cleaned_dataset': $AZUREML_DATAREFERENCE_968541f2dcba479d9e555e8cb591d18c, 'Cleaning_transformation': $AZUREML_DATAREFERENCE_c169cb2678ab4e6282690972e844b04c}
Parameters:  {'Columns to be cleaned': <azureml.studio.core.utils.column_selection.ColumnSelectionBuilder object at 0x000001F9C758DC50>, 'Minimum missing value ratio': 0.0, 'Maximum missing value ratio': 1.0, 'Cleaning mode': 'Remove entire row', 'Replacement value': None, 'Generate missing value indicator column': None, 'Cols with all missing values': None}


ModuleStep azureml://Split Da