In [28]:
# Connect to workspace
import azureml.core
from azureml.core import Workspace

In [29]:
ws = Workspace.from_config(_file_name='config.json')

In [30]:
# assign default datastore
ds = ws.get_default_datastore()
ds

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-096be808-712b-4684-aea5-cbd1045f0ddf",
  "account_name": "mlworkspace1398408280",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [31]:
# upload dataset to datastore
ds.upload_files(files=['./data_external/test_scores.csv'],
               target_path='student_scores/',
               overwrite=True,
               show_progress=True)

Uploading an estimated of 1 files
Uploading ./data_external/test_scores.csv
Uploaded ./data_external/test_scores.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_a60286a81ba247ef887a4b808d07efb6

In [32]:
# check out dataset
from azureml.core import Dataset

# create tabular dataset
student_tab = Dataset.Tabular.from_delimited_files(path=(ds, 'student_scores/*.csv'))

In [33]:
# register dataset
try:
    student_tab = student_tab.register(workspace=ws,
                                       name='Student Scores Dataset',
                                       description='Test scores for students',
                                       tags={'format' : 'csv', 'purpose' : 'dp-100'},
                                       create_new_version=True)
    print('Dataset registered.')
except Exception as ex:
    print(ex)

Dataset registered.


## Train model

In [34]:
import os

# Create a folder for the experiment files
experiment_folder = 'experiment_student_scores'
os.makedirs(experiment_folder, exist_ok=True)
print(experiment_folder, 'folder created')

experiment_student_scores folder created


In [35]:
%%writefile $experiment_folder/student_scores_training.py

# Import libraries
import os
import argparse
from azureml.core import Run, Dataset
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Get script arguments: Dataset ID and test size
parser = argparse.ArgumentParser()
parser.add_argument('--testsplit', type=float, dest='test_split', default=0.3, help='test dataset split')
parser.add_argument('--input-data', type=str, dest='training_dataset_id', help='training dataset')
args = parser.parse_args()

# Set test split
test_split = args.test_split

# Get experiment run context
run = Run.get_context()

# Get training dataset
print('Loading Data...')
student_df = run.input_datasets['training_data'].to_pandas_dataframe()

# create dummies and merge
cat_method = pd.get_dummies(student_df['teaching_method'])
cat_gender = pd.get_dummies(student_df['gender'])
cat_lunch = pd.get_dummies(student_df['lunch'])
cat_school = pd.get_dummies(student_df['school_setting'])
student_df = pd.concat([student_df[['n_student', 'pretest', 'posttest']], 
                        cat_method, cat_gender, cat_lunch, cat_school], axis=1)

# Re-define the target/prediction label
posttest = student_df['posttest']
student_df.drop(columns=['posttest'], inplace=True)
student_df['posttest'] = posttest

# Separate features and labels
X, y = student_df[student_df.columns[0:-1]].values, student_df[student_df.columns[-1]].values

# Split data with input arg 'test_split'
print('Splitting data with train size', str(1-test_split),
      'and test size', str(test_split))
run.log('Test split', np.float(test_split))
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=test_split)

# Train the model in a pipeline

# Define scaling of numeric features
num_features = np.arange(len(X_train[0]))
num_transformer = Pipeline(steps=[('scaler', StandardScaler())])

# Assign transfomer to preprocessor
preprocessor = ColumnTransformer(transformers=[('num',
                                               num_transformer,
                                               num_features)])

# Create preprocessing and training pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', RandomForestRegressor())])

# Fit the pipeline to train
model = pipeline.fit(X_train, (y_train))

# Get predictions
y_hat = model.predict(X_test)

# Generate evaluation metrics
mse = mean_squared_error(y_test, y_hat)
print('MSE:', mse)
run.log('MSE', np.float(mse))

rmse = np.sqrt(mse)
print('RMSE:', rmse)
run.log('RMSE', np.float(rmse))

r2 = r2_score(y_test, y_hat)
print('R2 score:', r2)
run.log('R2 Score', np.float(r2))

# Export the model
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/student_scores_model.pkl')

# Complete the run
run.complete()

Overwriting experiment_student_scores/student_scores_training.py


In [36]:
# Create compute

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "slowCompute"

try:
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, using it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)


Found existing cluster, using it.


### Create conda environment config

In [37]:
%%writefile $experiment_folder/student_scores_env.yml
name: batch_environment
dependencies:
- python=3.6.2
- scikit-learn
- pip
- pip:
  - azureml-defaults


Overwriting experiment_student_scores/student_scores_env.yml


### Define and run experiment

In [38]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.widgets import RunDetails

# Create Python environment for the experiment
student_env = Environment.from_conda_specification(name='experiment_env', 
                                                   file_path=experiment_folder + '/student_scores_env.yml')

# Get the training dataset
student_ds = ws.datasets.get('Student Scores Dataset')

# Create script run config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='student_scores_training.py',
                                arguments=['--testsplit', 0.3, # test split size
                                           '--input-data', student_ds.as_named_input('training_data')],
                                environment = student_env,
                                compute_target=None) #alt use cluster

# Run the experiment
experiment = Experiment(workspace=ws, name='dp100-student-scores')
run = experiment.submit(config=script_config)
RunDetails(run).show()
run.wait_for_completion()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'dp100-student-scores_1626329781_99f755c4',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2021-07-15T06:16:25.221063Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '5bffa828-7c12-4686-8246-63e5a6919b11'},
 'inputDatasets': [{'dataset': {'id': '86278970-840d-4b41-be39-da9fb93f36d7'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'training_data', 'mechanism': 'Direct'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'student_scores_training.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--testsplit',
   '0.3',
   '--input-data',
   'DatasetConsumptionConfig:training_data'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {'training_data': {'dataLocation': {'dataset': {'id': '86278970-840d-4b41-be39-da9fb93f36d7',
      'name': 'Student Scores Dataset',
      'version': '2'},
     'dataPath': None,
 

In [39]:
# Register the trained model

from azureml.core import Model

run.register_model(model_path='outputs/student_scores_model.pkl', 
                   model_name='student_score_model',
                   tags={'purpose': 'dp-100'},
                   properties={'MSE' : run.get_metrics()['MSE'],
                               'RMSE' : run.get_metrics()['RMSE'],
                               'R2 Score' : run.get_metrics()['R2 Score']})

Model(workspace=Workspace.create(name='ml-workspace', subscription_id='1b50f243-9e15-4373-91f7-59060f79af8a', resource_group='DP-100'), name=student_score_model, id=student_score_model:5, version=5, tags={'purpose': 'dp-100'}, properties={'MSE': '11.71771338260507', 'RMSE': '3.423114573397313', 'R2 Score': '0.939387881377058'})

### Deploy model

In [45]:
# get the model we want to deploy
model = ws.models['student_score_model']
print('Ready to deploy', model.name, '--version', model.version)

Ready to deploy student_score_model --version 5


In [48]:
# Re-use experiment folder for service files

service_folder = './' + experiment_folder
script_file = os.path.join(service_folder, 'scoring_students.py') # scoring script
env_file = os.path.join(service_folder, 'student_scores_env.yml') # conda env definition
print('Script file:', script_file)
print('Env file:', env_file)


Script file: ./experiment_student_scores/scoring_students.py
Env file: ./experiment_student_scores/student_scores_env.yml


We already have defined the environment file above.  
But we need to create the scoring/script file:

In [52]:
%%writefile $script_file
import json
import joblib
import numpy as np
from azureml.core.model import Model

def init(): # Called when the service is loaded
    global model
    model_path = Model.get_model_path('student_score_model')
    model = joblib.load(model_path)

def run(raw_data): # Run when a request is received
    data = np.array(json.loads(raw_data)['data'])
    predictions = model.predict(data)
    return json.dumps(predictions)

Writing ./experiment_student_scores/scoring_students.py


In [53]:
# Deploy the service
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig

# configure scoring environment
inference_config = InferenceConfig(runtime='python', 
                                   entry_script=script_file,
                                   conda_file=env_file)
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)
service_name = 'student-scores-service'
service = Model.deploy(ws, service_name, [model], inference_config, deployment_config)
service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-07-15 09:00:39+02:00 Creating Container Registry if not exists.
2021-07-15 09:00:42+02:00 Use the existing image.
2021-07-15 09:00:43+02:00 Generating deployment configuration.
2021-07-15 09:00:44+02:00 Submitting deployment to compute..
2021-07-15 09:00:51+02:00 Checking the status of deployment student-scores-service..
2021-07-15 09:02:56+02:00 Checking the status of inference endpoint student-scores-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [55]:
# View the active web services
for webservice_name in ws.webservices:
    print(webservice_name)

student-scores-service


## Consume web-service

In [None]:
# Due to the feature transformation steps, the dataset cannot be consumed
# Should probably be in a pipeline or something like that, including all the preprocessing steps.

# Something for next iteration!

In [63]:
# Delete the web service
service.delete()

In [62]:
x_data = Dataset.get_by_name(ws, name='Student Scores Dataset').to_pandas_dataframe()
x_data = x_data.sample().iloc[0,:].values
print(x_data)

['UUUQX' 'Suburban' 'Non-public' 'P8I' 'Standard' 20.0 '038FI' 'Male'
 'Does not qualify' 67.0 80.0]
