## Workspace

In [5]:
from azureml.core import Workspace

In [7]:
# download config.json from machine learning portal
ws = Workspace.from_config()

## Dataset

In [None]:
from azureml.core import Dataset

In [None]:
# I already configured the datalake as a dataset, now we can simply access all data
datalake = Dataset.get_by_name(workspace=ws, name='datalake')

In [None]:
from azureml.core import Datastore

In [None]:
blob_store = Datastore.get(ws, datastore_name='data_lake_gen2')

## Experiment & Run  
### Interactive inline method

In [None]:
from azureml.core import Experiment

In [None]:
# create an experiment variable
experiment = Experiment(workspace=ws, name="experiment_01")
# start the experiment
run = experiment.start_logging()
# experiment code goes here
# log          Record a single named value
# log_list     Record a named list of values
# log_row      Record a row with multiple columns
# log_table    Record a dictionary as a table
# log_image    Record an image file or a plot
run.log('Accuracy', 0.50)
run.log('Accuracy', 0.55)
run.log('Accuracy', 0.60)
run.log('Accuracy', 0.65)
run.log('Accuracy', 0.77)
# end the experiment
run.complete()
# only for this specific Run we can get the log data:
run.get_metrics()

## View progress

In [None]:
from azureml.widgets import RunDetails

In [None]:
# notebook widget to view the progress of model training
RunDetails(run).show()

## Experiment & Run  
### Script method

In [None]:
# creating a script "experiment.py"

In [None]:
%%writefile experiment.py
from azureml.core import Run
import pandas as pd
import os

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
data = pd.read_csv('data.csv')

# Count the rows and log the result
row_count = (len(data))
run.log('observations', row_count)

# Save a sample of the data
os.makedirs('outputs', exist_ok=True)
data.head(2).to_csv("outputs/sample.csv", index=False, header=True)

# Complete the run
run.complete()

In [None]:
# creating testdata
import pandas as pd
df = pd.DataFrame({"firstName":["bart","koen","karel"],
                   "lastName":["Vermeers","Aerts","Venbelsteren"]})
df.to_csv("data.csv")

In [1]:
# RunConfiguration = python environment setup
# ScriptRunConfig  = script + environment setup
from azureml.core import Experiment, RunConfiguration, ScriptRunConfig

In [None]:
# create a new RunConfig object
# Represents configuration for experiment runs targeting different compute targets in Azure Machine Learning
experiment_run_config = RunConfiguration()

In [None]:
# Create a ScriptRunConfig object
# Represents configuration information for submitting a training run in Azure Machine Learning
script_config = ScriptRunConfig(source_directory='.',
                                script='experiment.py',
                                run_config=experiment_run_config) 

In [None]:
# submit the experiment
experiment = Experiment(workspace=ws, name='experiment_02')
run = experiment.submit(config=script_config)
run.wait_for_completion(show_output=True)

# Experiment & Run
## Estimator

In [2]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

In [3]:
# Create an estimator
estimator = Estimator(source_directory='.',
                      entry_script='experiment.py',
                      compute_target='local',
                      conda_packages=['scikit-learn']
                      )

In [8]:
# Create and run an experiment
experiment = Experiment(workspace=ws, name='experiment_03')
run = experiment.submit(config=estimator)

In [11]:
#run.wait_for_completion(show_output=True)

In [12]:
# encapsulates a 'Run Configuration' and a 'Script Run Configuration' in a single object !

# Experiment & Run
## passing arguments

In [16]:
%%writefile experiment_argparse.py
from azureml.core import Run
import argparse
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# Get the experiment run context
run = Run.get_context()

# Set regularization hyperparameter
parser = argparse.ArgumentParser()
parser.add_argument('--reg_rate', type=float, dest='reg', default=0.01)
args = parser.parse_args()
reg = args.reg

# Prepare the dataset
data = pd.read_csv('data.csv')
#X, y = data[['Feature1','Feature2','Feature3']].values, data['Label'].values
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Train a logistic regression model
#model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# Count the rows and log the result and save the argument value
row_count = (len(data))
run.log('observations', row_count)
run.log('reg_rate', reg) # <------------

# calculate accuracy
#y_hat = model.predict(X_test)
#acc = np.average(y_hat == y_test)
#run.log('Accuracy', np.float(acc))

# Save a sample of the data
os.makedirs('outputs', exist_ok=True)
data.head(2).to_csv("outputs/sample.csv", index=False, header=True)

# Save the trained model
#os.makedirs('outputs', exist_ok=True)
#joblib.dump(value=model, filename='outputs/model.pkl')

# Complete the run
run.complete()

Writing experiment_argparse.py


### use script_params = {'--reg_rate': 0.1}

In [19]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

In [20]:
# Create an estimator
estimator = Estimator(source_directory='.',
                      entry_script='experiment_argparse.py',
                      script_params = {'--reg_rate': 0.1}, # <-------------
                      compute_target='local',
                      conda_packages=['scikit-learn', 'joblib']
                      )

In [21]:
# Create and run an experiment
experiment = Experiment(workspace=ws, name='experiment_04')
run = experiment.submit(config=estimator)

In [23]:
#run.wait_for_completion(show_output=True)