In [105]:
import azureml
import json

from azureml.core.model import Model, InferenceConfig
from azureml.core import Workspace, Datastore, Experiment
from azureml.core.dataset import Dataset

import tensorflow as tf
from IPython.display import Image, display

import matplotlib.pyplot as plt
import tensorflow_hub as hub

import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [106]:
import os

subscription_id = os.getenv("SUBSCRIPTION_ID", default="d6233897-5c9f-47f9-8507-6d4ada2d5183")
resource_group = os.getenv("RESOURCE_GROUP", default="RG_Jaganlal")
workspace_name = os.getenv("WORKSPACE_NAME", default="beanleaf_classifier")
workspace_region = os.getenv("WORKSPACE_REGION", default="centralus")
cluster_name = os.getenv("CLUSTER_NAME", default="beanleaf-classifier-cluster")
dataset_path_in_datastore = os.getenv("BLOB_STORAGE_PATH", default="beanleaf")
data_local_directory = os.getenv("DATA_LOCAL_DIRECTORY", default="./beans/")
experiment_name = os.getenv("EXPERIMENT_NAME", default="BeanClassifierExperiment")
blob_container = os.getenv("BLOB_CONTAINER", default="beanleaf_dataset")

In [107]:
ws = Workspace.from_config()
ws

Workspace.create(name='beanleaf_classifier', subscription_id='d6233897-5c9f-47f9-8507-6d4ada2d5183', resource_group='RG_Jaganlal')

In [109]:
from azureml.core import ComputeTarget
target = ComputeTarget(ws, cluster_name)
target

AmlCompute(workspace=Workspace.create(name='beanleaf_classifier', subscription_id='d6233897-5c9f-47f9-8507-6d4ada2d5183', resource_group='RG_Jaganlal'), name=beanleaf-classifier-cluster, id=/subscriptions/d6233897-5c9f-47f9-8507-6d4ada2d5183/resourceGroups/RG_Jaganlal/providers/Microsoft.MachineLearningServices/workspaces/beanleaf_classifier/computes/beanleaf-classifier-cluster, type=AmlCompute, provisioning_state=Succeeded, location=centralus, tags={})

# Connect to Workspace

In [110]:
# Connect to compute for training
compute_target = ComputeTarget(workspace=ws, name=cluster_name)
print("Compute Target:", compute_target.name)

# Connect to the datastore for the training images
ds = Datastore.get_default(ws)
print("Datastore:", ds.name)

# Connect to the experiment
exp = Experiment(workspace=ws, name=experiment_name)
print("Experiment:", exp.name)

Compute Target: beanleaf-classifier-cluster
Datastore: workspaceblobstore
Experiment: BeanClassifierExperiment


In [None]:
from azureml.data.datapath import DataPath

# Upload data to AzureML Datastore
ds = ws.get_default_datastore()
ds = Dataset.File.upload_directory(src_dir=data_local_directory,
            target=DataPath(ds, 'beanleaf_dataset'),
            show_progress=True, overwrite=True)

# Register file dataset with AzureML
ds = ds.register(workspace=ws, name="beanleaf", description="Bean leaves with train, test and validation folders", create_new_version=True)
print(f'Dataset {ds.name} registered.')

In [112]:
datastore_paths = [(ds, os.path.join('beanleaf_dataset', 'train/healthy/healthy_train.0.jpg'))]
data_ds = Dataset.File.from_files(path=datastore_paths)
data_ds

{
  "source": [
    "('workspaceblobstore', 'beanleaf_dataset/train/healthy/healthy_train.0.jpg')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ]
}

In [146]:
input_data = Dataset.File.from_files(ds.path('beanleaf_dataset')).as_named_input('input').as_mount()

In [148]:
print(Dataset.File.from_files(ds.path('beanleaf_dataset')).as_named_input('input').path_on_compute)

None


In [80]:
%%writefile ./dummy_train.py

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import sys
import os
from os.path import exists
import tensorflow as tf

print("*********************************************************")
print("Hello Azure ML!")

mounted_input_path = sys.argv[1]
# mounted_output_path = sys.argv[2]

print("Argument 1: %s" % mounted_input_path)

path_to_file = os.path.join(mounted_input_path, 'train/healthy/healthy_train.0.jpg')
print("Path to file:", path_to_file)
file_exists = exists(path_to_file)
print("File healthy_train.0.jpg - ", file_exists)

batch_size = 128
img_height = 224
img_width = 224

training_path = os.path.join(mounted_input_path, 'train')
training_path_exists = exists(training_path)
print("training_path_exists - ", training_path_exists)

train_ds = tf.keras.preprocessing.image_dataset_from_directory(training_path,
                                                        seed=111,
                                                        image_size=(img_height, img_width),
                                                        batch_size=batch_size)

print('Training DS')

Overwriting ./dummy_train.py


In [61]:
# dataset = Dataset.get_by_name(ws, name='beanleaf', version='latest')
# dataset_folder = dataset.as_mount()

In [142]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment("myenv")

myenv.docker.enabled = True
myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk>=1.12.0', 'tensorflow', 'tensorflow_hub', 'matplotlib', 'numpy'])

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [143]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory = './',
                      script = 'train.py',
                      arguments=[
                            '--container_name', input_data,
                            '--dataset_name', 'beanleaf',
                            '--dataset_desc', '',
                            '--model_path', './models/',
                            '--artifact_loc', './outputs/models/'
                        ],
                      compute_target = compute_target,
                      environment = myenv)
src


<azureml.core.script_run_config.ScriptRunConfig at 0x7fb0d899db50>

In [144]:
run = exp.submit(config=src)

In [145]:
run.wait_for_completion(show_output=True)

RunId: BeanClassifierExperiment_1663035315_4e402d7b
Web View: https://ml.azure.com/runs/BeanClassifierExperiment_1663035315_4e402d7b?wsid=/subscriptions/d6233897-5c9f-47f9-8507-6d4ada2d5183/resourcegroups/RG_Jaganlal/workspaces/beanleaf_classifier&tid=95917aa5-d840-4443-a19e-aa3ad09d4bb5

Streaming user_logs/std_log.txt

2022-09-13 02:15:35.875124: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-13 02:15:35.994386: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /azureml-envs/azureml_e66e5708c61c76070b5659316b08495b/lib:
2022-09-13 02:15:35.994417:

{'runId': 'BeanClassifierExperiment_1663035315_4e402d7b',
 'target': 'beanleaf-classifier-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-09-13T02:15:28.930408Z',
 'endTimeUtc': '2022-09-13T02:16:24.736661Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlctrain',
  'ContentSnapshotId': '0815570c-58b6-46c4-8d4e-fce5508e0c4f',
  'azureml.git.repository_uri': 'git@github.com-jaganlal:jaganlal/beanleaf-disease-classifier.git',
  'mlflow.source.git.repoURL': 'git@github.com-jaganlal:jaganlal/beanleaf-disease-classifier.git',
  'azureml.git.branch': 'main',
  'mlflow.source.git.branch': 'main',
  'azureml.git.commit': '2197d6a4d10b35e7a9dc816302c8e8d107897811',
  'mlflow.source.git.commit': '2197d6a4d10b35e7a9dc816302c8e8d107897811',
  'azureml.git.dirty': 'True',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': 'de256d06-8a1a-491e-910c-e63da3735e95'}, 'consum