In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.28.0 to work with mlops


In [2]:
import os
from pathlib import Path

# Get experiment folder
experiment_folder = Path(os.getcwd()).parent
print(experiment_folder)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/gpu-mlops/code/Users/s147056/image-restoration


In [3]:
model = ws.models['image_resto']
print(model.name, 'version', model.version)

image_resto version 2


## Create yaml for env

In [4]:
from azureml.core.conda_dependencies import CondaDependencies 

# Ensure the required packages are installed (we need pip, scikit-learn and Azure ML defaults)
packages = CondaDependencies()

# Add pip packages from requirements.txt
with open(os.path.join(experiment_folder, "requirements.txt"), "r") as f:
    for line in f:
        if line[0].isalpha():
            packages.add_pip_package(line.strip())
            
# Save the environment config as a .yml file
env_file = os.path.join(experiment_folder, "src", "deploy", "image_resto_env.yml")
with open(env_file,"w") as f:
    f.write(packages.serialize_to_string())
print("Saved dependency info in", env_file)

# Print the .yml file
with open(env_file,"r") as f:
    print(f.read())           

Saved dependency info in /mnt/batch/tasks/shared/LS_root/mounts/clusters/gpu-mlops/code/Users/s147056/image-restoration/src/deploy/image_resto_env.yml
# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults

  - click
  - Sphinx
  - coverage
  - awscli
  - flake8
  - python-dotenv>=0.5.1
  - wandb
  - pathlib2
  - argparse
  - torch
  - torchvision
  - opencv-python
  - joblib
channels:
- anaconda
- conda-forge



## Deploy

In [5]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core import Model

# Set path for scoring script
script_file = os.path.join(experiment_folder, "src", "deploy", "deploy_model.py")

# Configure the scoring environment
inference_config = InferenceConfig(runtime= "python",
                                   entry_script=script_file,
                                   conda_file=env_file)

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

service_name = "image-reconstruction-service"

service = Model.deploy(ws, service_name, [model], inference_config, deployment_config)

service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-06-15 20:10:02+00:00 Creating Container Registry if not exists.
2021-06-15 20:10:02+00:00 Registering the environment.
2021-06-15 20:10:04+00:00 Use the existing image.
2021-06-15 20:10:04+00:00 Generating deployment configuration.
2021-06-15 20:10:05+00:00 Submitting deployment to compute..
2021-06-15 20:10:11+00:00 Checking the status of deployment image-reconstruction-service..
2021-06-15 20:13:50+00:00 Checking the status of inference endpoint image-reconstruction-service.
Failed


Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 7fbc26ad-d0a9-4b8a-8d11-74bcee4d7e19
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "statusCode": 400,
  "message": "Aci Deployment failed with exception: Error in entry script, ModuleNotFoundError: No module named 'src', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, ModuleNotFoundError: No module named 'src', please run print(service.get_logs()) to get details."
    }
  ]
}



WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 7fbc26ad-d0a9-4b8a-8d11-74bcee4d7e19
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "statusCode": 400,
  "message": "Aci Deployment failed with exception: Error in entry script, ModuleNotFoundError: No module named 'src', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, ModuleNotFoundError: No module named 'src', please run print(service.get_logs()) to get details."
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: 7fbc26ad-d0a9-4b8a-8d11-74bcee4d7e19\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"statusCode\": 400,\n  \"message\": \"Aci Deployment failed with exception: Error in entry script, ModuleNotFoundError: No module named 'src', please run print(service.get_logs()) to get details.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Error in entry script, ModuleNotFoundError: No module named 'src', please run print(service.get_logs()) to get details.\"\n    }\n  ]\n}"
    }
}

In [6]:
print(service.get_logs())

2021-06-15T20:14:21,381043400+00:00 - rsyslog/run 
2021-06-15T20:14:21,379546000+00:00 - iot-server/run 
2021-06-15T20:14:21,379547900+00:00 - gunicorn/run 
2021-06-15T20:14:21,400435700+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-06-15T20:14:21,684239100+00:00 - iot-server/finish 1 0
2021-06-15T20:14:21,686507000+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (158)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 187
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-06-15 20:14:25,935 | root | INFO | Starting up app insights client
2021-06-15 20:14:25,935 | root | INFO | Starting up request id generator
2021-06-15 20:14:25,936 | root | INFO | Starting up app insight hooks
2021-06-15 20:14:25,936 | root | INFO | Invoking user's init function
2021-06-15 20:14:25,942 | root | ERROR | User's init function failed

In [49]:
for webservice_name in ws.webservices:
    print(webservice_name)

In [None]:
import json
import torch
import matplotlib.pyplot as plt

x_new = torch.rand(1, 224, 224)*255

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Call the web service, passing the input data (the web service will also accept the data in binary format)
reconstruction = service.run(input_data = input_json)

# Get the predicted class - it'll be the first (and only) one.
reconstruction = json.loads(reconstruction)

%matplotlib inline

type(reconstruction)