# ** Construct (scheduled) retraining pipeline and register model**

## **Workspace, experiment and compute target**

In [None]:
# get the workspace, experiment

from azureml.core import Workspace, Experiment, Datastore
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

ws = Workspace.from_config()
print("Found workspace {} at location {}".format(ws.name, ws.location))

experiment_name = '<YOUR EXPERIMENT NAME HERE>'
exp = Experiment(workspace=ws, name=experiment_name)

#set up compute target
from azureml.core.compute import ComputeTarget, AmlCompute

compute_name = "<YOUR COMPUTE NAME HERE>"
compute_target = ws.compute_targets[compute_name]

## ** Set up environment(with conda dependencies) OR Create and Register the training run's environment _(only execute once)_**
## **【Note】matching versions of torch and torchvision packages**

In [None]:
cd = CondaDependencies.create(conda_packages=['pandas','scikit-learn'], 
        pip_packages=['azureml-sdk', 'azureml-dataprep[fuse,pandas]','pillow','openpyxl','xlrd==1.1.0','tensorflow','torch == 1.4.0', 'torchvision == 0.5.0', 'torchsummary', 'transformers == 3.5.0', 'opencensus-ext-azure>=1.0.1', 'matplotlib'], 
        pin_sdk_version=False)
env = Environment(name="<YOUR_ENV_NAME>")
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace=ws)

## ** Configuring the training run (RunConfiguration) object**

In [4]:
from azureml.core.runconfig import RunConfiguration
aml_run_config = RunConfiguration()
aml_run_config.target = compute_target
env = Environment.get(workspace=ws, name="<YOUR_ENVIRONMENT_NAME>")
aml_run_config.environment = env

## **【Optional】: Add environment variable with Application Insights Connection String**

In [5]:
aml_run_config.environment.environment_variables = {
    "APPLICATIONINSIGHTS_CONNECTION_STRING": 'InstrumentationKey=xxxxxxxxxxxxxxxxxxxxx'
}

## **Construct pipeline including three steps: training, evaluation and register**

In [9]:
from azureml.pipeline.steps import PythonScriptStep
from azureml.data import OutputFileDatasetConfig

source_dir = '<YOUR_SOURCE_DIR>'
training_entry_point = "training.py"
evaluation_entry_point = "evaluation.py"
register_entry_point = "register_model.py"

# Create an OutputFileDatasetConfig (temporary Data Reference) for data passed from step 1 to step 2
datastore = Datastore.get(ws, '<YOUR_DATASTORE_NAME>')
trained_data = OutputFileDatasetConfig(destination=(datastore, 'output/trained_model'))
evaluated_data = OutputFileDatasetConfig(destination=(datastore, 'output/evaluated_model'))

# set up steps of retraining pipeline
training_step = PythonScriptStep(
    source_directory = source_dir,
    script_name = training_entry_point,
    arguments = ['--output_model_dir', trained_data],
    compute_target = compute_target,
    runconfig = aml_run_config,
    allow_reuse = True
)

evaluation_step = PythonScriptStep(
    source_directory = source_dir,
    script_name = evaluation_entry_point,
    arguments = ['--model_for_evaluation', trained_data.as_input(),
                 '--model_for_predicting', evaluated_data],
    compute_target = compute_target,
    runconfig = aml_run_config,
    allow_reuse = False
)

register_step = PythonScriptStep(
    source_directory = source_dir,
    script_name = register_entry_point,
    arguments = ['--model', evaluated_data.as_input()],
    compute_target = compute_target,
    runconfig = aml_run_config,
    allow_reuse = True
)
# connect all steps in a list
analysis_step = [training_step, evaluation_step, register_step]

## **Build the pipeline**

In [None]:
from azureml.pipeline.core import Pipeline
retraining_pipeline = Pipeline(workspace=ws, steps=analysis_step)
published_retraining_pipeline = retraining_pipeline.publish(name="<YOUR_PIPELINE_NAME>", description="MLOps pipeline", version="1.0")

## **Submit the pulished pipeline (also can submit pipeline on AML portal)**

In [None]:
pipeline_run = exp.submit(published_retraining_pipeline)
print("Pipeline submitted for execution.")
pipeline_run.wait_for_completion()

## ** Monitor a run**

In [None]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()

## **Set up retraining schedule based on time(or based on change)**

In [13]:
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule
recurrence = ScheduleRecurrence(frequency="Hour", interval=6)
recurring_schedule = Schedule.create(ws, name="MyRecurringSchedule", 
                            description="Based on time",
                            pipeline_id= '<YOUR_PIPELINE_ID>', 
                            experiment_name = experiment_name, 
                            recurrence = recurrence)

## _ Reference methods_

### _Archive the experiment_
```
exp.archive()
```

### _View all schedules in the current workspace_
```
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule
ss = Schedule.list(ws)
for s in ss:
    print(s)
```

### _Delete schedule by schedule id_
```
def stop_by_schedule_id(ws, schedule_id):
    s = next(s for s in Schedule.list(ws) if s.id == schedule_id)
    s.disable()
    return s
schedule_id = '<YOUR_SCHEDULE_ID>'
stop_by_schedule_id(ws, schedule_id)
```