# Support notebook - Training locally and remotly


### Version - Running code in docker image locally/remotely
- docker image maintained by Azure ML for me, based on python version and libraries. Me, as user, do not maintain it
-- locally in my computer 
-- remotely in remote compute

### Version - Running code in specific docker image locally/remotely
- specific docker image . In this example the specific docker image from Azure ML image registry, which was created by Azure ML service for me.
-- locally in my computer 
-- remotely in remote compute


## ML generic

In [None]:

import os
import azureml.core
from azureml.core import Workspace, Dataset, Datastore, ComputeTarget, RunConfiguration, Experiment
from azureml.core.runconfig import CondaDependencies
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.core import Environment
from sklearn.impute import SimpleImputer
from azureml.widgets import RunDetails

In [None]:
# read current workspace from file
ws=Workspace.from_config()
print (ws)

In [None]:
# Attach compute 
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "automl-compute")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('found compute target. just use it. ' + compute_name)
else:
    print('creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes,
                                                                max_nodes=compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout.
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

In [None]:
#Accessing dataset which is already registered
# get dataset by dataset name
diabetes_data = Dataset.get_by_name(workspace=ws, name='diabetes_data')
diabetes_labels = Dataset.get_by_name(workspace=ws, name='diabetes_labels')

df = diabetes_data.to_pandas_dataframe()
df.head(10)

In [None]:
%%writefile ./helen/script/helen_train_simple3.py

# my training script - simple read and train

from azureml.core import Dataset, Run
import os
import numpy as np


output_dir='./helen/output'
os.makedirs ('./helen/output',exist_ok=True)

run = Run.get_context()
# get the input dataset by name
dataset = run.input_datasets['diabetes_data']
# load the TabularDataset to pandas DataFrame
df = dataset.to_pandas_dataframe()
x_array=df.to_numpy()

print ('helen is printing X dataframe cnt',df.count())
print ('helen is printing X numpy cnt',np.count_nonzero(x_array [:,0]))

dataset = run.input_datasets['diabetes_labels']
# load the TabularDataset to pandas DataFrame
df = dataset.to_pandas_dataframe()
y_array=df.to_numpy()


print ('helen is printing y dataframe cnt',df.count())
print ('helen is printing y numpy cnt',np.count_nonzero(y_array [:,0]))


run.log('data cnt',df.count())


# load diabetes dataset, a well-known small dataset that comes with scikit-learn
# REading dataset from file 
# Below writing to file
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib

#X, y = load_diabetes(return_X_y = True)
y=y_array
X=x_array
columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
data = {
   "train":{"X": X_train, "y": y_train},        
   "test":{"X": X_test, "y": y_test}
}
reg = Ridge(alpha = 0.03)
reg.fit(data['train']['X'], data['train']['y'])
preds = reg.predict(data['test']['X'])
print('Mean Squared Error is', mean_squared_error(preds, data['test']['y']))

 # Output the Mean Squared Error to the notebook and to the run
run.log('mse', mean_squared_error(preds, data['test']['y']))

 # Save the model to the outputs directory for capture
model_file = 'diabetes_helen.pkl'
model_file_name=os.path.join(output_dir, model_file)

joblib.dump(value = reg, filename = model_file_name);


print(run.get_file_names())

# upload the model file explicitly into artifacts 
run.upload_file(name = model_file_name, path_or_stream = model_file_name)

# register model
model = run.register_model(model_name='helen_test',model_path=model_file_name)
print(model.name, model.id, model.version, sep='\t')



for a in range (len(preds)):
    print (str (preds[a]) + '  actual:' + str (y_test[a]) + ' actual ',  X_test[a] )

# Training - local and remote

### Version - Running code in docker image locally/remotely
- docker image maintained by Azure ML for me, based on python version and libraries. Me, as user, do not maintain it
-- locally in my computer 
-- remotely in remote compute

### Version - Running code in specific docker image locally/remotely
- specific docker image . In this example the specific docker image from Azure ML image registry, which was created by Azure ML service for me.
-- locally in my computer 
-- remotely in remote compute


## Version - Running code in docker image locally/remotely

In [None]:
# Working locally, Nov 2020 
# Azure ML will create for me docker image 
from azureml.train.estimator import Estimator
from azureml.train.sklearn import SKLearn
from azureml.widgets import RunDetails

data_dir='./helen/data'
script_dir='./helen/script'
experiment = Experiment(workspace=ws, name="local_python_run")


est = SKLearn(source_directory=script_dir,
                entry_script='helen_train_simple3.py',
                # pass dataset object as an input with name 'titanic'
                inputs=[diabetes_data.as_named_input('diabetes_data'),diabetes_labels.as_named_input('diabetes_labels')],
                #CORRECT
                pip_packages = ['azureml-sdk','azureml-dataprep[fuse,pandas]'],
                #conda_packages=['azureml-sdk','numpy','scikit-learn'],
                #WORKS correctly  
                compute_target='local'
                #Wroks correctly 
                # compute_target=compute_target
               )

# Submit the estimator as part of your experiment run

experiment_run = experiment.submit(est)

RunDetails(experiment_run).show()

#experiment_run = experiment.submit(est)
experiment_run.wait_for_completion(show_output=True)



In [None]:
# Working remotely Nov 2020
# Azure ML will create for me docker image 
from azureml.train.estimator import Estimator
from azureml.train.sklearn import SKLearn
from azureml.widgets import RunDetails

data_dir='./helen/data'
script_dir='./helen/script'



est = SKLearn(source_directory=script_dir,
                entry_script='helen_train_simple3.py',
                # pass dataset object as an input 
                inputs=[diabetes_data.as_named_input('diabetes_data'),diabetes_labels.as_named_input('diabetes_labels')],
                #CORRECT
                pip_packages = ['azureml-sdk','azureml-dataprep[fuse,pandas]'],
                #conda_packages=['azureml-sdk','numpy','scikit-learn'],
                #WORKS correctly  
                #compute_target='local'
                #Wroks correctly 
                compute_target=compute_target
               )

# Submit the estimator as part of your experiment run

experiment_run = experiment.submit(est)

RunDetails(experiment_run).show()

#experiment_run = experiment.submit(est)
experiment_run.wait_for_completion(show_output=True)



## Version - Running code in specific docker image locally/remotely

In [None]:
# USING MY OWN DOCKER IMAGE - Image registry maintained by system in Azure ML
# this image was created my AzureML for me in previous run of Estimator step when providing libraries

ws.get_details() 
print (ws.get_details())
from azureml.core.container_registry import ContainerRegistry
# you can also point to an image in a private ACR

# that image was create my AzureML for me in previo Estimator step when providing libraries
custom_docker_image='azureml/azureml_eb5994d85083050810e56c9d1fa49cbd'

# Here is my very own azure Ml container registry 
image_registry_details = ContainerRegistry()
image_registry_details.address = 'helenmachine59dbf329.azurecr.io'
image_registry_details.username='helenmachine59dbf329'
image_registry_details.password='v'
print (image_registry_details)

In [None]:
# USING MY DOCKER IMAGE Nov 2020
# BOTH LOCALLY or REMOTELY . here remotely
# this image was created my AzureML for me in previous run of Estimator step when providing libraries

from azureml.train.estimator import Estimator
from azureml.train.sklearn import SKLearn
data_dir='./helen/data'
script_dir='./helen/script'
experiment = Experiment(workspace=ws, name="remote_docker_run")

# use a custom Docker image
from azureml.core.container_registry import ContainerRegistry



# don't let the system build a new conda environment
user_managed_dependencies = True

est = SKLearn(source_directory=script_dir,
                entry_script='helen_train_simple3.py',
                # pass dataset object as an input with name 
                inputs=[diabetes_data.as_named_input('diabetes_data'),diabetes_labels.as_named_input('diabetes_labels')],
                #CORRECT
                #pip_packages = ['azureml-sdk','azureml-dataprep[fuse,pandas]'],
                #conda_packages=['azureml-sdk','numpy','scikit-learn'],
                #WORKS correctly  
                #compute_target='local',
                #Wroks correctly 
                compute_target=compute_target,
                custom_docker_image='azureml/azureml_eb5994d85083050810e56c9d1fa49cbd',
                user_managed=user_managed_dependencies,
                image_registry_details=image_registry_details
               )


# Submit the estimator as part of your experiment run II way 
experiment_run = experiment.submit(est)
RunDetails(experiment_run).show()
experiment_run.wait_for_completion(show_output=True)


In [None]:
# USING MY DOCKER IMAGE  Nov 2020
# BOTH LOCALLY or REMOTELY . here locally
# this image was created my AzureML for me in previous run of Estimator step when providing libraries

from azureml.train.estimator import Estimator
from azureml.train.sklearn import SKLearn
from azureml.widgets import RunDetails
data_dir='./helen/data'
script_dir='./helen/script'
experiment = Experiment(workspace=ws, name="local_docker_run")

# use a custom Docker image
from azureml.core.container_registry import ContainerRegistry




# don't let the system build a new conda environment
user_managed_dependencies = True

est = SKLearn(source_directory=script_dir,
                entry_script='helen_train_simple3.py',
                # pass dataset object as an input 
                inputs=[diabetes_data.as_named_input('diabetes_data'),diabetes_labels.as_named_input('diabetes_labels')],
                #CORRECT
                #pip_packages = ['azureml-sdk','azureml-dataprep[fuse,pandas]'],
                #conda_packages=['azureml-sdk','numpy','scikit-learn'],
                #WORKS correctly  
                compute_target='local',
                #Wroks correctly 
                #compute_target=compute_target,
                custom_docker_image='azureml/azureml_eb5994d85083050810e56c9d1fa49cbd',
                user_managed=user_managed_dependencies,
                image_registry_details=image_registry_details
               )


# Submit the estimator as part of your experiment run II way 
experiment_run = experiment.submit(est)
RunDetails(experiment_run).show()
experiment_run.wait_for_completion(show_output=True)



In [None]:
# To check run
from azureml.widgets import RunDetails
experiment_run = experiment.submit(est)

RunDetails(experiment_run).show()
experiment_run.wait_for_completion(show_output=True)

# extra code

In [None]:
#
# To activate this environment, use:
# > source activate /azureml-envs/azureml_b2ad30260b00c8bf1a18b629f070b89f
#
# To deactivate an active environment, use:
# > source deactivate
#

In [None]:
# NOT YET TESTED; BUT SHOULD WORK 

from azureml.train.estimator import Estimator
data_dir='./helen/data'
script_dir='./helen/script'



est = Estimator(source_directory=script_dir,
                entry_script='helen_train.py',
                # pass dataset object as an input with name 'titanic'
                inputs=[diabetes_data.as_named_input('diabetes_data')],
                #conda_packages=['azureml-sdk','numpy','scikit-learn'],
                compute_target=compute_target,
                environment_definition= curated_env
               )

# Submit the estimator as part of your experiment run
experiment_run = experiment.submit(est)
experiment_run.wait_for_completion(show_output=True)