In [None]:
# ------------------------------------------------------------------------------
# This is generated from https://ml.azure.com/visualinterface/authoring/Normal/7ecd84fe-094b-4b45-866e-1b321be92f20?wsid=/subscriptions/edf507a2-6235-46c5-b560-fd463ba2e771/resourcegroups/devops-test-18681/workspaces/liu2s3v2uksioworkspace&tid=72f988bf-86f1-41af-91ab-2d7cd011db47
# To run this code, please install SDK by this command:
# !pip install "azure-ml-component[notebooks]" --extra-index-url https://azuremlsdktestpypi.azureedge.net/modulesdkpreview --upgrade
# More detailed guide to set up your environment: https://github.com/Azure/DesignerPrivatePreviewFeatures/blob/master/azure-ml-components/samples/setup-environment.ipynb
# ------------------------------------------------------------------------------

In [None]:
from azureml.core import Workspace
from azureml.core import Datastore
from azure.ml.component import Pipeline, Component, dsl

In [None]:
# configure aml workspace
ws = Workspace.from_config()

In [None]:
# get components
microsoft_com_cat_stratified_splitter_func = Component.load(ws, name='microsoft.com.cat.stratified_splitter', version='1.1.1')
microsoft_com_cat_sar_training_func = Component.load(ws, name='microsoft.com.cat.sar_training', version='1.1.1')
microsoft_com_cat_ndcg_func = Component.load(ws, name='microsoft.com.cat.ndcg', version='1.1.1')
microsoft_com_cat_sar_scoring_func = Component.load(ws, name='microsoft.com.cat.sar_scoring', version='1.1.1')

In [None]:
# get dataset
from azureml.core import Dataset
if 'movie_ratings' not in ws.datasets:
    datastore = Datastore.get(ws, 'azureml_globaldatasets')
    dataset = Dataset.File.from_files((datastore, 'GenericCSV/Movie_Ratings'))
    dataset.register(workspace=ws, name='movie_ratings', description='Movie Ratings')
movie_ratings = ws.datasets['movie_ratings']

In [None]:
# define pipeline
@dsl.pipeline(name='Pipeline-Created-on-10-11-2021', description='Pipeline created on 20211011', default_datastore='workspaceblobstore')
def generated_pipeline():
    microsoft_com_cat_stratified_splitter_0 = microsoft_com_cat_stratified_splitter_func(
        input_path=movie_ratings,
        ratio=0.75,
        user_column='UserId',
        item_column='MovieId',
        seed=42)
    microsoft_com_cat_stratified_splitter_0.runsettings.resource_layout.configure(node_count=1)
    microsoft_com_cat_stratified_splitter_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    microsoft_com_cat_sar_training_0 = microsoft_com_cat_sar_training_func(
        input_path=microsoft_com_cat_stratified_splitter_0.outputs.output_train_data,
        user_column='UserId',
        item_column='MovieId',
        rating_column='Rating',
        timestamp_column='Timestamp',
        normalize=False,
        time_decay=False)
    microsoft_com_cat_sar_training_0.runsettings.resource_layout.configure(node_count=1)
    microsoft_com_cat_sar_training_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    microsoft_com_cat_sar_scoring_0 = microsoft_com_cat_sar_scoring_func(
        trained_model=microsoft_com_cat_sar_training_0.outputs.output_model,
        dataset_to_score=microsoft_com_cat_stratified_splitter_0.outputs.output_test_data,
        score_type='Item recommendation',
        items_to_predict='Items in score set',
        ranking_metric='Rating',
        remove_seen_items=False,
        top_k=10,
        sort_top_k=True,
        normalize=False)
    microsoft_com_cat_sar_scoring_0.runsettings.resource_layout.configure(node_count=1)
    microsoft_com_cat_sar_scoring_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    microsoft_com_cat_ndcg_0 = microsoft_com_cat_ndcg_func(
        rating_true=microsoft_com_cat_stratified_splitter_0.outputs.output_test_data,
        rating_pred=microsoft_com_cat_sar_scoring_0.outputs.score_result,
        user_column='UserId',
        item_column='MovieId',
        rating_column='Rating',
        prediction_column='prediction',
        relevancy_method='top_k',
        top_k=10,
        threshold=10.0)
    microsoft_com_cat_ndcg_0.runsettings.resource_layout.configure(node_count=1)
    microsoft_com_cat_ndcg_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')

In [None]:
# create a pipeline
pipeline = generated_pipeline()

In [None]:
# validate pipeline and visualize the graph
pipeline.validate()

In [None]:
# submit a pipeline run
# pipeline.submit(experiment_name='sample-experiment-name').wait_for_completion()