In [33]:
import os
import sys, inspect
import pandas as pd
import time
import glob
import shutil
import tempfile
from azureml.core import Workspace, Experiment, Run

#from auth import get_auth
from src.config import EVAL_CONFIG, DATA_CONFIG, RESULT_CONFIG

In [34]:
code_dir = "src"
scrfiles = glob.glob(os.path.join(code_dir, "*.py"))
scrfiles

['src/config.py',
 'src/constants.py',
 'src/evaluatepose.py',
 'src/posepoints.py',
 'src/utils.py']

In [35]:
modelpath = os.path.join(DATA_CONFIG.POSEROOT_PATH, 
                DATA_CONFIG.POSETYPE_PATH,
                DATA_CONFIG.MODELTYPE_PATH)

print('modelpath ', modelpath)


modelpath  pose/coco/pose_iter_440000.caffemodel


In [36]:
protopath = os.path.join(DATA_CONFIG.POSEROOT_PATH,
                DATA_CONFIG.POSETYPE_PATH,
                DATA_CONFIG.PROTOTXT_PATH)
print('protopath ', protopath)


protopath  pose/coco/deploy_coco.prototxt


In [37]:
#from os.path import basename
cwd = os.getcwd()
# parent directory 
parent = os.path.dirname(cwd) 

#print("Parent of parent directory", parent2)
#os.listdir(parent2)
posepath = DATA_CONFIG.POSEROOT_PATH

In [38]:
def list_files(startdir, dirname):
    filepaths = []
    for root, dirs, files in os.walk(startdir, topdown=True):
        if dirname in dirs:
            #res.append(os.path.join(here, 'repository.config'))
            for filename in os.listdir(os.path.join(startdir, dirname, DATA_CONFIG.POSETYPE_PATH)):
                filepaths.append(os.path.join(startdir, dirname, DATA_CONFIG.POSETYPE_PATH, filename))

    
    return filepaths



In [39]:
modelfiles = list_files(parent, posepath)
modelfiles


['/mnt/resource/batch/tasks/shared/LS_root/mounts/clusters/pankaja-compins/code/Users/pankaja_us/Pose-Estimation/pose/coco/deploy_coco.prototxt',
 '/mnt/resource/batch/tasks/shared/LS_root/mounts/clusters/pankaja-compins/code/Users/pankaja_us/Pose-Estimation/pose/coco/pose_iter_440000.caffemodel']

In [40]:
paths = scrfiles + modelfiles
paths


['src/config.py',
 'src/constants.py',
 'src/evaluatepose.py',
 'src/posepoints.py',
 'src/utils.py',
 '/mnt/resource/batch/tasks/shared/LS_root/mounts/clusters/pankaja-compins/code/Users/pankaja_us/Pose-Estimation/pose/coco/deploy_coco.prototxt',
 '/mnt/resource/batch/tasks/shared/LS_root/mounts/clusters/pankaja-compins/code/Users/pankaja_us/Pose-Estimation/pose/coco/pose_iter_440000.caffemodel']

In [41]:
#create a temp folder and copy code, model and dataset

In [42]:
print("Creating temp folder...")
temp_path = "tmp_eval"
if os.path.exists(temp_path):
    shutil.rmtree(temp_path)
os.mkdir(temp_path)

for p in paths:
    shutil.copy(p, temp_path)
print("Done.")

Creating temp folder...
Done.


In [43]:
#When run manually use below
workspace = Workspace.from_config()
#When run through pipeline use below
#workspace = Workspace.from_config(auth = get_auth())

In [44]:
#experimentname = 'anonrgbtrain_poseestimation_ps'
experimentname = EVAL_CONFIG.NAME
print('experimentname ', experimentname)
experiment = Experiment(workspace = workspace, name = experimentname)


experimentname  rgbtrain-poseest-95k-run_1


In [45]:
#Find/create a compute target.

In [46]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Compute cluster exists. Just connect to it.
cluster_name = "gpu-cluster"

try:
    compute_target = ComputeTarget(workspace = workspace, name = EVAL_CONFIG.CLUSTER_NAME)
    print("Found existing compute target.")

# Compute cluster does not exist. Create one.    
except ComputeTargetException:
    print("Creating a new compute target...")
    compute_config = AmlCompute.provisioning_configuration(
        vm_size = 'Standard_NC6', 
        max_nodes = 4
    )
    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)
    compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)
    
compute_target

Found existing compute target.


AmlCompute(workspace=Workspace.create(name='cgm-ml-prod-we-azml', subscription_id='9b5bbfae-d5d1-4aae-a2ca-75159c0c887d', resource_group='cgm-ml-prod-we-rg'), name=gpu-cluster, id=/subscriptions/9b5bbfae-d5d1-4aae-a2ca-75159c0c887d/resourceGroups/cgm-ml-prod-we-rg/providers/Microsoft.MachineLearningServices/workspaces/cgm-ml-prod-we-azml/computes/gpu-cluster, type=AmlCompute, provisioning_state=Succeeded, location=westeurope, tags=None)

In [47]:
dataset = workspace.datasets[DATA_CONFIG.NAME]
dataset

{
  "source": [
    "('omdena_datasets', 'rgb_training/**')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "286a707f-3920-4abb-8294-6e73edf3801d",
    "name": "anon_rgb_training",
    "version": 1,
    "description": "An RGB dataset for training the models.",
    "workspace": "Workspace.create(name='cgm-ml-prod-we-azml', subscription_id='9b5bbfae-d5d1-4aae-a2ca-75159c0c887d', resource_group='cgm-ml-prod-we-rg')"
  }
}

In [48]:
from azureml.core.runconfig import MpiConfiguration
from azureml.train.dnn import TensorFlow
TensorFlow.get_supported_versions()

['1.10', '1.12', '1.13', '2.0', '2.1', '2.2']

In [49]:
#parameters used in the evaluation
#script_params = {f"--MODEL_{k}": v for k, v in MODEL_CONFIG.items()}
script_params = {f"--EVAL_{k}": v for k, v in EVAL_CONFIG.items()}
script_params.update({f"--DATA_{k}": v for k, v in DATA_CONFIG.items()})
script_params.update({f"--RESULT_{k}": v for k, v in RESULT_CONFIG.items()})
script_params

{'--EVAL_NAME': 'rgbtrain-poseest-95k-run_1',
 '--EVAL_EXPERIMENT_NAME': 'anonrgbtrain_poseestimation_ps',
 '--EVAL_CLUSTER_NAME': 'gpu-cluster',
 '--EVAL_DEBUG_RUN': False,
 '--EVAL_DEBUG_NUMBER_OF_SCAN': 50,
 '--EVAL_SPLIT_SEED': 0,
 '--DATA_NAME': 'anon_rgb_training',
 '--DATA_IMAGE_TARGET_HEIGHT': 240,
 '--DATA_IMAGE_TARGET_WIDTH': 180,
 '--DATA_BATCH_SIZE': 256,
 '--DATA_NORMALIZATION_VALUE': 7.5,
 '--DATA_TARGET_INDEXES': [0],
 '--DATA_NUM_SCANFILES': 0,
 '--DATA_CODE_TO_SCANTYPE': {'100': '_front',
  '101': '_360',
  '102': '_back',
  '200': '_lyingfront',
  '201': '_lyingrot',
  '202': '_lyingback'},
 '--DATA_POSEROOT_PATH': 'pose',
 '--DATA_POSETYPE_PATH': 'coco',
 '--DATA_PROTOTXT_PATH': 'deploy_coco.prototxt',
 '--DATA_MODELTYPE_PATH': 'pose_iter_440000.caffemodel',
 '--DATA_DATASETTYPE_PATH': 'COCO',
 '--RESULT_COLUMNS': ['artifact'],
 '--RESULT_SAVE_PATH': 'outputs/'}

In [50]:
start = time.time()

In [51]:
tags= {}
# Specify pip packages here.
pip_packages = [
    "azureml-dataprep[fuse,pandas]",
    "glob2",
    "opencv-python==4.1.2.30",
    "matplotlib",
]

# Create the estimator.
estimator = TensorFlow(
    source_directory=temp_path,
    compute_target=compute_target,
    entry_script="evaluatepose.py",
    use_gpu=True,
    framework_version="2.2",
    inputs=[dataset.as_named_input("dataset").as_mount()],
    pip_packages=pip_packages,
    script_params=script_params
)

# Set compute target.
estimator.run_config.target = compute_target

# Run the experiment.
run = experiment.submit(estimator, tags=tags)

# Show run.
run

Submitting /mnt/resource/batch/tasks/shared/LS_root/mounts/clusters/pankaja-compins/code/Users/pankaja_us/Pose-Estimation/QA/tmp_eval directory for run. The size of the directory >= 25 MB, so it can take a few minutes.


Experiment,Id,Type,Status,Details Page,Docs Page
rgbtrain-poseest-95k-run_1,rgbtrain-poseest-95k-run_1_1605890365_2b6053c0,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [None]:
#Check the logs of the current run until is complete
run.wait_for_completion(show_output=True)

In [None]:
#Print Completed when run is completed
print(run.get_status())

In [None]:
end = time.time()
print("Total time for evaluation experiment: {} sec".format(end-start))

In [None]:
#Download the evaluation results of the model 
GET_CSV_FROM_EXPERIMENT_PATH = '.'
run.download_file(RESULT_CONFIG.SAVE_PATH, GET_CSV_FROM_EXPERIMENT_PATH)
print("Downloaded the result.csv")

In [None]:
#Display the evaluation results
from IPython.display import display, HTML
import pandas as pd

result = pd.read_csv('result.csv')
display(HTML(result.to_html()))

In [None]:
#Delete temp folder
shutil.rmtree(temp_path)