In [5]:
from new_module_sdk import Workspace, DatasetX, ModuleStepX, ExperimentX
from azureml.studio.core.utils.column_selection import ColumnSelectionBuilder

### Initialize workspace

In [2]:
ws = Workspace.from_config()

### Get dataset

In [3]:
dataset = DatasetX.get_by_data_reference(ws, "GenericCSV/Automobile_price_data_(Raw)")

### Get and define modules

In [6]:
select_columns_in_dataset = ModuleStepX.get(ws, name='Select Columns in Dataset')
select_columns_in_dataset.inputs.dataset = dataset
select_columns_in_dataset.params.select_columns = ColumnSelectionBuilder().include_all().exclude_col_names('normalized-losses')

Input: ['Dataset']
Mapping property to field names.
self.dataset => self['Dataset']

Output: ['Results_dataset']
Mapping property to field names.
self.results_dataset => self['Results_dataset']

Parameter: ['Select Columns']
Mapping property to field names.
self.select_columns => self['Select Columns']



In [8]:
clean_missing_data = ModuleStepX.get(ws, name='Clean Missing Data')
clean_missing_data.inputs.dataset = select_columns_in_dataset.outputs.results_dataset
clean_missing_data.params.columns_to_be_cleaned = ColumnSelectionBuilder().include_all()
clean_missing_data.params.minimum_missing_value_ratio = 0.0
clean_missing_data.params.maximum_missing_value_ratio = 1.0
clean_missing_data.params.cleaning_mode = 'Remove entire row'

Input: ['Dataset']
Mapping property to field names.
self.dataset => self['Dataset']

Output: ['Cleaned_dataset', 'Cleaning_transformation']
Mapping property to field names.
self.cleaned_dataset => self['Cleaned_dataset']
self.cleaning_transformation => self['Cleaning_transformation']

Parameter: ['Columns to be cleaned', 'Minimum missing value ratio', 'Maximum missing value ratio', 'Cleaning mode', 'Replacement value', 'Generate missing value indicator column', 'Cols with all missing values']
Mapping property to field names.
self.columns_to_be_cleaned => self['Columns to be cleaned']
self.minimum_missing_value_ratio => self['Minimum missing value ratio']
self.maximum_missing_value_ratio => self['Maximum missing value ratio']
self.cleaning_mode => self['Cleaning mode']
self.replacement_value => self['Replacement value']
self.generate_missing_value_indicator_column => self['Generate missing value indicator column']
self.cols_with_all_missing_values => self['Cols with all missing values']

In [9]:
split_data = ModuleStepX.get(ws, name='Split Data')
split_data.inputs.dataset = clean_missing_data.outputs.cleaned_dataset
split_data.params.fraction_of_rows_in_the_first_output_dataset = 0.7
split_data.params.splitting_mode = 'Split Rows'
split_data.params.randomized_split = True
split_data.params.stratified_split = False

Input: ['Dataset']
Mapping property to field names.
self.dataset => self['Dataset']

Output: ['Results_dataset1', 'Results_dataset2']
Mapping property to field names.
self.results_dataset1 => self['Results_dataset1']
self.results_dataset2 => self['Results_dataset2']

Parameter: ['Splitting mode', 'Fraction of rows in the first output dataset', 'Randomized split', 'Random seed', 'Stratified split', 'Stratification key column', 'Regular expression', 'Relational expression']
Mapping property to field names.
self.splitting_mode => self['Splitting mode']
self.fraction_of_rows_in_the_first_output_dataset => self['Fraction of rows in the first output dataset']
self.randomized_split => self['Randomized split']
self.random_seed => self['Random seed']
self.stratified_split => self['Stratified split']
self.stratification_key_column => self['Stratification key column']
self.regular_expression => self['Regular expression']
self.relational_expression => self['Relational expression']



In [10]:
linear_regression = ModuleStepX.get(ws, name='Linear Regression')
linear_regression.params.solution_method = 'Ordinary Least Squares'
linear_regression.params.l2_regularization_term_weight = 0.001
linear_regression.params.include_intercept_term = True
linear_regression.params.random_number_seed = 0

Input: []
Mapping property to field names.

Output: ['Untrained_model']
Mapping property to field names.
self.untrained_model => self['Untrained_model']

Parameter: ['Solution method', 'Create trainer mode', 'Learning rate', 'Number of epochs over which algorithm iterates through examples', 'L2 regularization term weight', 'Range for learning rate', 'Range for number of epochs over which algorithm iterates through examples', 'Range for L2 regularization term weight', 'Should input instances be normalized', 'Decrease learning rate as iterations progress', 'L2 regularization weight', 'Include intercept term', 'Random number seed']
Mapping property to field names.
self.solution_method => self['Solution method']
self.create_trainer_mode => self['Create trainer mode']
self.learning_rate => self['Learning rate']
self.number_of_epochs_over_which_algorithm_iterates_through_examples => self['Number of epochs over which algorithm iterates through examples']
self.l2_regularization_term_weight => 

In [11]:
train_model = ModuleStepX.get(ws, name='Train Model')
train_model.inputs.untrained_model = linear_regression.outputs.untrained_model
train_model.inputs.dataset = split_data.outputs.results_dataset1
train_model.params.label_column = ColumnSelectionBuilder().include_col_names('price')

Input: ['Untrained_model', 'Dataset']
Mapping property to field names.
self.untrained_model => self['Untrained_model']
self.dataset => self['Dataset']

Output: ['Trained_model']
Mapping property to field names.
self.trained_model => self['Trained_model']

Parameter: ['Label column']
Mapping property to field names.
self.label_column => self['Label column']



In [12]:
score_model = ModuleStepX.get(ws, name='Score Model')
score_model.inputs.trained_model = train_model.outputs.trained_model
score_model.inputs.dataset = split_data.outputs.results_dataset2
score_model.params.append_score_columns_to_output = True

Input: ['Trained_model', 'Dataset']
Mapping property to field names.
self.trained_model => self['Trained_model']
self.dataset => self['Dataset']

Output: ['Scored_dataset']
Mapping property to field names.
self.scored_dataset => self['Scored_dataset']

Parameter: ['Append score columns to output']
Mapping property to field names.
self.append_score_columns_to_output => self['Append score columns to output']



In [13]:
evaluate_model = ModuleStepX.get(ws, name='Evaluate Model')
evaluate_model.inputs.scored_dataset = score_model.outputs.scored_dataset

Input: ['Scored_dataset', 'Scored_dataset_to_compare']
Mapping property to field names.
self.scored_dataset => self['Scored_dataset']
self.scored_dataset_to_compare => self['Scored_dataset_to_compare']

Output: ['Evaluation_results']
Mapping property to field names.
self.evaluation_results => self['Evaluation_results']

Parameter: []
Mapping property to field names.



### Submit and run experiment

In [14]:
steps = [select_columns_in_dataset, clean_missing_data, split_data, linear_regression, train_model, score_model, evaluate_model]
ExperimentX.submit(ws, steps, 'test_experiment_ex')

ModuleStep azureml://Select Columns in Dataset
Inputs:  {'Dataset': $AZUREML_DATAREFERENCE_Dataset}
Outputs:  {'Results_dataset': $AZUREML_DATAREFERENCE_fca5e3179f8a48f6b1003d1b9e77918d}
Parameters:  {'Select Columns': <azureml.studio.core.utils.column_selection.ColumnSelectionBuilder object at 0x000002375FCF2438>}


ModuleStep azureml://Clean Missing Data
Inputs:  {'Dataset': $AZUREML_DATAREFERENCE_fca5e3179f8a48f6b1003d1b9e77918d}
Outputs:  {'Cleaned_dataset': $AZUREML_DATAREFERENCE_e158165b229643de99b7ab6f24eb9466, 'Cleaning_transformation': $AZUREML_DATAREFERENCE_fc51d14279ce406e9a6e3e8a002f1b6a}
Parameters:  {'Columns to be cleaned': <azureml.studio.core.utils.column_selection.ColumnSelectionBuilder object at 0x0000023770CE4978>, 'Minimum missing value ratio': 0.0, 'Maximum missing value ratio': 1.0, 'Cleaning mode': 'Remove entire row'}


ModuleStep azureml://Split Data
Inputs:  {'Dataset': $AZUREML_DATAREFERENCE_e158165b229643de99b7ab6f24eb9466}
Outputs:  {'Results_dataset2': $A