# Deep Models Experiments Premier Analysis on Azure Machine Learning
### LSTM and DAN 

In [1]:
import argparse
import os
import sklearn
import pandas as pd 
import numpy as np
from sklearn.metrics import f1_score,accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from azureml.core import Run, Dataset, Environment,Experiment,ScriptRunConfig
from sklearn.preprocessing import LabelEncoder
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

In [2]:
from azureml.core import  Workspace
from azureml.core.authentication import InteractiveLoginAuthentication
interactive_auth = InteractiveLoginAuthentication(tenant_id="9ce70869-60db-44fd-abe8-d2767077fc8f")

ws = Workspace.from_config()

In [3]:
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: cdh-azml-dev-mlw
Azure region: eastus
Subscription id: 320d8d57-c87c-4434-827f-59ee7d86687a
Resource group: csels-cdh-dev


In [4]:
# current working directory
path = os.getcwd()
print("Current Directory:", path)
  
# parent directory
parent = os.path.join(path, os.pardir)
  
# prints parent directory
print("\nParent Directory:", os.path.abspath(parent))

premier_path = os.path.abspath(parent)

Current Directory: /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/azure_ml

Parent Directory: /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis


### Create Compute

In [5]:
clustername = 'StandardNC6'
is_new_cluster = False
try:
    aml_cluster = ComputeTarget(workspace = ws,name= clustername)
    print("Find the existing cluster")
except ComputeTargetException:
    print("Cluster not find - Creating cluster.....")
    is_new_cluster = True
    compute_config = AmlCompute.provisioning_configuration(vm_size='StandardNC6',
                                                           max_nodes=2)
    aml_cluster = ComputeTarget.create(ws, clustername, compute_config)

aml_cluster.wait_for_completion(show_output=True)

Find the existing cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [6]:
features_path = os.path.join(premier_path,'output/parquet')
features_pkl = os.path.join(premier_path,'output/pkl')


data_store = ws.get_default_datastore()
data_store.upload(src_dir=features_path,target_path='parquet',overwrite=True,show_progress=True)
data_store.upload(src_dir=features_pkl,target_path='pkl',overwrite=True,show_progress=True)

"Datastore.upload" is deprecated after version 1.0.69. Please use "Dataset.File.upload_directory" to upload your files             from a local directory and create FileDataset in single method call. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 3 files
Uploading /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/output/parquet/.amlignore
Uploaded /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/output/parquet/.amlignore, 1 files out of an estimated total of 3
Uploading /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/output/parquet/.amlignore.amltmp
Uploaded /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/output/parquet/.amlignore.amltmp, 2 files out of an estimated total of 3
Uploading /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/output/parquet/flat_features.parquet
Uploaded /mnt/batch/tasks/shared/LS_root/mounts/clusters/wsn8-su2/code/Users/WSN8-SU/premier_analysis/output/parquet/flat_features.parquet, 3 files out of an estimated total of 3
Uploaded 3 files
Uploading an estimated

In [None]:
cohort_path = os.path.join(premier_path,'output/cohort')
data_store.upload(src_dir=cohort_path,target_path='cohort',overwrite=True,show_progress=True)

In [59]:
%%writefile conda_dependencies_model.yml

channels:
- anaconda
- default
dependencies:
- python=3.8
- pip:
  - azureml-defaults
  - matplotlib
  - pandas
  - argparse
  - joblib
  - scikit-learn
  - azureml-sdk
  - openpyxl
  - tensorflow
  - keras-tuner

Overwriting conda_dependencies_model.yml


In [None]:
premier_train_model_env = Environment.from_conda_specification(name='premier_train_model_env', file_path='conda_dependencies_model.yml')
# Specify a GPU base image
premier_train_model_env.docker.enabled = True
premier_train_model_env.docker.base_image = DEFAULT_CPU_IMAGE
premier_train_model_env.register(workspace=ws)

In [None]:

mod_names = ['dan', 'lstm']
#outcomes = ['icu','death','misa_pt']
outcomes = ['icu']
EPOCHS = 10
for outcome in outcomes:
    # Create experiment
    experiment = Experiment(workspace=ws, name=f"Premier-outcome-{outcome}")
    for mod in mod_names:

        estimator = ScriptRunConfig(source_directory='./training',
                            script='train_model.py',
                            compute_target=aml_cluster,
                            arguments=['--all_days','--outcome', outcome,'--model', mod,'--epochs',EPOCHS],
                            environment=premier_train_model_env)

        
        print("Submit Experiment")
        run = experiment.submit(estimator)