### Author: Kubam Ivo
### Purpose: Tutorials for AML data scientist Associate

## AML Tutorial


In [5]:
#Connect to my AML workspace
from azureml.core import Workspace
ws = Workspace.from_config()
print('My workspace name is '+ str(ws.name))

My workspace name is ml_practice


In [7]:
# Creating an experiment
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='diabetes-experiment')

In [17]:
# Loading and prepare data
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df['Y']

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=123)

In [18]:
# Train a model
from sklearn.linear_model import Ridge # Model class
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

for alpha in alphas:
    run = experiment.start_logging()
    run.log('alpha_value', alpha) #logging alpha values

    model = Ridge(alpha=alpha) # Instantiating model hyperparameters
    model.fit(X=X_train, y=y_train) # Model fitting
    y_pred = model.predict(X=X_test) # Model prediction
    rmse = math.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
    run.log('rmse', rmse) # logging rmse 

    model_name = "model_alpha_" + str(alpha) + ".pkl" #serialising each run
    filename = "outputs/" + model_name

    joblib.dump(value=model, filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename) #Uploading model for each run
    run.complete()

In [19]:
experiment # fetching the link in AML studio

Name,Workspace,Report Page,Docs Page
diabetes-experiment,ml_practice,Link to Azure Machine Learning studio,Link to Documentation


In [20]:
# Getting the best run from the experiment
minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print("Best run_id: " + minimum_rmse_runid)
print("Best run_id rmse: " + str(minimum_rmse))

KeyError: 'rmse'