# Fashion MNIST Image Classification - Azure ML SDK Training

add introduction 

In [1]:
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import azureml
from azureml.core import Workspace, Run

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  0.1.65


Load your Azure ML workspace from thye config file you setup. The config file will look like below:

![Config File](./images/configfile.JPG "Config file")

In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

Found the config file in: C:\Users\amynic\Documents\CloudDeveloperAdvocate\Deeplearning\ai-world-summit\config.json
amyboyd_ws	westeurope	deeplearning	westeurope


In [25]:
experiment_name = 'deeplearning_fashion'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)
print(exp)

Experiment(Name: deeplearning_fashion,
Workspace: amyboyd_ws)


## Create Cloud Compute Target 

In [8]:
from azureml.core.compute import ComputeTarget, BatchAiCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
batchai_cluster_name = "traincluster"

try:
    # look for the existing cluster by name
    compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)
    if type(compute_target) is BatchAiCompute:
        print('found compute target {}, just use it.'.format(batchai_cluster_name))
    else:
        print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))
except ComputeTargetException:
    print('creating a new compute target...')
    compute_config = BatchAiCompute.provisioning_configuration(vm_size="STANDARD_NC6", # small CPU-based VM
                                                                #vm_priority='lowpriority', # optional
                                                                autoscale_enabled=True,
                                                                cluster_min_nodes=1, 
                                                                cluster_max_nodes=5)

    # create the cluster
    compute_target = ComputeTarget.create(ws, batchai_cluster_name, compute_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it uses the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

    # Use the 'status' property to get a detailed status for the current cluster. 
    print(compute_target.status.serialize())

creating a new compute target...
Creating
succeeded.............
BatchAI wait for completion finished
Minimum number of nodes requested have been provisioned
{'allocationState': 'resizing', 'allocationStateTransitionTime': '2018-10-08T16:56:45.067000+00:00', 'creationTime': '2018-10-08T16:56:45.067000+00:00', 'currentNodeCount': 1, 'errors': None, 'nodeStateCounts': {'idleNodeCount': 0, 'leavingNodeCount': 0, 'preparingNodeCount': 1, 'runningNodeCount': 0, 'unusableNodeCount': 0}, 'provisioningState': 'succeeded', 'provisioningStateTransitionTime': '2018-10-08T16:57:05.081000+00:00', 'scaleSettings': {'manual': None, 'autoScale': {'maximumNodeCount': 5, 'minimumNodeCount': 1, 'initialNodeCount': 1}}, 'vmPriority': 'dedicated', 'vmSize': 'STANDARD_NC6'}


In [9]:
import os
script_folder = './keras-fashion'
os.makedirs(script_folder, exist_ok=True)

In [26]:
ds = ws.get_default_datastore()
print(ds.datastore_type, ds.account_name, ds.container_name)

ds.upload(src_dir='./data', target_path='fashiondata', overwrite=True, show_progress=True)

AzureFile amyboydws4528596861 azureml-filestore-848d7cfa-9e06-44f5-9ed0-5f6d3e4d8c28


$AZUREML_DATAREFERENCE_6ead2fc076dc47578af36b8dcce207d6

## Write train.py file and save model.h5

In [10]:
%%writefile $script_folder/train.py

import tensorflow as tf
import os
import time
os.environ["TF_CPP_MIN_LOG_LEVEL"]= "2"
print("tensorflow Version is: " + str(tf.__version__))

import numpy as np
os.environ['KERAS_BACKEND'] = 'tensorflow'
from keras import backend as K
print(os.environ['KERAS_BACKEND'])

#Fashion MNIST Dataset CNN model development: https://github.com/zalandoresearch/fashion-mnist
from keras.datasets import fashion_mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import utils, losses, optimizers
from sklearn.externals import joblib

import azureml
from azureml.core import Workspace, Run

# let user feed in 2 parameters, the location of the data files (from datastore), and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
args = parser.parse_args()

data_folder = os.path.join(args.data_folder, 'keras-fashion')
print('Data folder:', data_folder)

#variables
num_classes = 10
batch_size = 128
epochs = 24
img_rows,img_cols = 28,28

#data for train and testing
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Define the text labels
fashion_mnist_labels = ["Top",          # index 0
                        "Trouser",      # index 1
                        "Jumper",       # index 2 
                        "Dress",        # index 3 
                        "Coat",         # index 4
                        "Sandal",       # index 5
                        "Shirt",        # index 6 
                        "Trainer",      # index 7 
                        "Bag",          # index 8 
                        "Ankle boot"]   # index 9


print(x_train.shape, y_train.shape, x_test.shape, y_test.shape, sep = '\n')

#data pre-processing
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test,  num_classes)

#formatting issues for depth of image (greyscale = 1) with different kernels (tensorflow, cntk, etc)
if K.image_data_format()== 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0],1,img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols,1)
    x_test = x_test.reshape(x_test.shape[0],img_rows, img_cols,1)
    input_shape = (img_rows, img_cols,1)
    
#Define the CNN model
model = Sequential()

model.add(Conv2D(64, kernel_size=(3,3), padding = 'same', activation = 'relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(32, kernel_size=(3,3), padding = 'same', activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))

model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

# get hold of the current run
run = Run.get_submitted_run()

print('Train a deep learning model')
model.compile(loss=losses.categorical_crossentropy, optimizer=optimizers.Adam(), metrics=['accuracy'])
hist = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test))

#evaluate the model on the test data
print('Predict the test set')
score = model.evaluate(x_test, y_test, verbose=0)
print('Test Loss: ', score[0])
print('Test Accuracy: ', score[1])

# calculate accuracy on the prediction
print('Accuracy is', score[1])

run.log('accuracy', np.float(score[1]))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
model.save('outputs/model.h5')


Overwriting ./keras-fashion/train.py


## Run the model in the cloud for training

In [11]:
import shutil
shutil.copy('utils.py', script_folder)

'./keras-fashion\\utils.py'

In [24]:
from azureml.train.estimator import Estimator

script_params = {
    '--data-folder': ds.as_mount()
}

est = Estimator(source_directory=script_folder,
                script_params=script_params,
                compute_target=compute_target,
                entry_script='train.py',
                conda_packages=['keras', 'scikit-learn'])

In [19]:
run = exp.submit(config=est)
run

Experiment,Id,Type,Status,Details Page,Docs Page
deeplearning_fashion,deeplearning_fashion_1539020034713,azureml.scriptrun,Running,Link to Azure Portal,Link to Documentation


In [20]:
from azureml.train.widgets import RunDetails
RunDetails(run)
RunDetails(run).show()

_UserRun()

In [None]:
run.wait_for_completion(show_output=True) # specify True for a verbose log

## Evaluate the model output

In [21]:
print(run.get_metrics())

{'accuracy': 0.9223}


In [22]:
print(run.get_file_names())

['azureml-logs/60_control_log.txt', 'azureml-logs/80_driver_log.txt', 'outputs/model.h5', 'driver_log', 'azureml-logs/azureml.log', 'azureml-logs/55_batchai_execution.txt']


## Are you happy with the model??? Register it in Azure Machine Learning to manage

In [None]:
# register model 
model = run.register_model(model_name='keras_dl_fashion', model_path='outputs/')
print(model.name, model.id, model.version, sep = '\t')

## Tidy up Compute Resource when not in use

In [None]:
# optionally, delete the Azure Managed Compute cluster
compute_target.delete()