## Experiments Premier Analysis on Azure Machine Learning
### Baseline models

In [1]:
import argparse
import os
import sklearn
import pandas as pd 
import numpy as np
from sklearn.metrics import f1_score,accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from azureml.core import Run, Dataset, Environment,Experiment,ScriptRunConfig
from sklearn.preprocessing import LabelEncoder
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
import mlflow

In [2]:
from azureml.core import  Workspace
from azureml.core.authentication import InteractiveLoginAuthentication


ws = Workspace.from_config()

In [3]:
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: cdh-azml-dev-mlw
Azure region: eastus
Subscription id: 320d8d57-c87c-4434-827f-59ee7d86687a
Resource group: CSELS-CDH-DEV


In [4]:
# current working directory
path = os.getcwd()
print("Current Directory:", path)
  
# parent directory
parent = os.path.join(path, os.pardir)
  
# prints parent directory
print("\nParent Directory:", os.path.abspath(parent))

premier_path = os.path.abspath(parent)

Current Directory: c:\Users\wsn8\Code\premier_analysis\azure_ml

Parent Directory: c:\Users\wsn8\Code\premier_analysis


### Create Compute

In [5]:
clustername = 'StandardD13v2'
is_new_cluster = False
try:
    aml_compute_cpu = ComputeTarget(workspace = ws,name= clustername)
    print("Find the existing cluster")
except ComputeTargetException:
    print("Cluster not find - Creating cluster.....")
    is_new_cluster = True
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS13_V2',
                                                           max_nodes=2)
    aml_compute_cpu = ComputeTarget.create(ws, clustername, compute_config)

aml_compute_cpu.wait_for_completion(show_output=True)

Find the existing cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [6]:
premier_train_baseline_env = Environment.from_conda_specification(name='premier_train_baseline_env', file_path='./environments/conda_dependencies_baseline.yml')
# Specify a CPU base image
premier_train_baseline_env.docker.enabled = True
premier_train_baseline_env.docker.base_image = DEFAULT_CPU_IMAGE
premier_train_baseline_env.register(workspace=ws)

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


{
    "assetId": "azureml://locations/eastus/workspaces/d5539876-73f2-429b-9d16-cd4969e1602d/environments/premier_train_baseline_env/versions/5",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220915.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": true,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "premier_trai

In [7]:
# create script config

mod_names = ['lgr', 'rf', 'gbc']
#outcomes = ['icu','misa_pt','death']

#mod_names = ['svm']
outcomes = ['misa_pt']
for mod in mod_names:
    for outcome in outcomes:
        estimator = ScriptRunConfig(source_directory='./training',
                            script='train_baseline.py',
                            compute_target=aml_compute_cpu,
                            arguments=['--day_one','--outcome', outcome,'--model', mod],
                            environment=premier_train_baseline_env)
        # Create experiment
        exp_name = f"Job-train-baseline-d1-{outcome}-{mod}"
        experiment = Experiment(workspace=ws, name=f"Job-train-baseline-d1-{outcome}-{mod}")
        
        print("Submit Experiment:",exp_name)
        run = experiment.submit(estimator)


Submit Experiment: Job-train-baseline-d1-misa_pt-lgr
Submit Experiment: Job-train-baseline-d1-misa_pt-rf
Submit Experiment: Job-train-baseline-d1-misa_pt-gbc
