#Part 1 - Train a classification model with Azure Machine Learning.

This code trains a simple logistic regression model using the credit card dataset with Azure Machine Learning. We train the model on remote compute resources.

##Import packages

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import azureml.core
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.30.0


##Connect to a workspace

We create a workspace object from our existing workspace in Azure ML. Workspace.from_config() reads the file config.json and loads the details into an object named ws.

In [None]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\n')

qmul-abhi-ws
uksouth
qmul-abhi-msc


##Create an experiment.

Create an experiment to track the runs in your workspace. A workspace can have muliple experiments.

In [None]:
experiment_name = 'Classification-creditcard-logistic'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

print(exp)

Experiment(Name: Classification-creditcard-logistic,
Workspace: qmul-abhi-ws)


##Create or Attach existing compute resource.

We will use our compute cluster (already setup) to train the model. The code below will search for our compute cluster, if not found then it creates a new cluster for us.


In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpu-cluster-agx")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("Found compute target: " + compute_name)
else:
    print("Creating new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

Found compute target: cpu-cluster-agx


##Train on remote cluster.

We need to submit a job to train our model. A job submission consists of - creating a directory, creating a training script, creating a script run configuration and submitting a job.

###Create a directory.

Create a directory to deliver the necessary code from your computer to the remote resource.

In [None]:
import os
script_folder = os.path.join(os.getcwd(), "creditcard-logistic")
os.makedirs(script_folder, exist_ok=True)

###Create a training script.

To submit the job to the cluster, first create a training script. Run the following code to create the training script called `train.py` in the directory you just created.

In [None]:
%%writefile $script_folder/train.py

import argparse
import os
import numpy as np
import pandas as pd
import joblib
from sklearn.linear_model import LogisticRegression
from azureml.core import Run

# let user feed in 2 parameters, the dataset to mount or download, and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
#parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')
args = parser.parse_args()

#data_folder = args.data_folder
#print('Data folder:', data_folder)

# load train and test set into numpy arrays
df = pd.read_csv('creditcard.csv')
print(df.shape)
feature_names = df.iloc[:, 1:30].columns
target = df.iloc[:1, 30:].columns

data_features = df[feature_names]
data_target = df[target]

print(feature_names)
print(target)

from sklearn.model_selection import train_test_split
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(data_features.to_numpy(), data_target.to_numpy(), train_size = 0.70, test_size = 0.30, random_state = 1)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')

# get hold of the current run
run = Run.get_context()

print('Train a logistic regression model with regularization rate of', args.reg)
clf = LogisticRegression(C=1.0/args.reg, solver="liblinear", multi_class="auto", random_state=42)
clf.fit(X_train, y_train)

print('Predict the test set')
y_hat = clf.predict(X_test)

# calculate accuracy on the prediction
acc = np.average(y_hat == y_test)
print('Accuracy is', acc)

run.log('regularization rate', np.float(args.reg))
run.log('accuracy', np.float(acc))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=clf, filename='outputs/creditcard_logistic_model.pkl')

Overwriting /mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-abhi/code/Users/ha20269/creditcardfraud/creditcard-logistic/train.py


###Configure the training job.

Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on. Configure the ScriptRunConfig by specifying:

* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. 
* The compute target.
* The training script name, train.py
* An environment that contains the libraries needed to run the script
* Arguments required from the training script. 

All files in the script folder are uploaded into the cluster nodes for execution.

First, create the environment that contains: the scikit-learn library, azureml-dataset-runtime required for accessing the dataset, and azureml-defaults which contains the dependencies for logging metrics. The azureml-defaults also contains the dependencies required for deploying the model as a web service later in the part 2.

Once the environment is defined, register it with the Workspace to re-use it in Part 2.

In [None]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# to install required packages
env = Environment('classification-creditcard-env')
cd = CondaDependencies.create(pip_packages=['azureml-dataset-runtime[pandas,fuse]', 'azureml-defaults'], conda_packages = ['scikit-learn==0.22.1'])

env.python.conda_dependencies = cd

# Register environment to re-use later
env.register(workspace = ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210513.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "classification-creditcard-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
              

In [None]:
from azureml.core import ScriptRunConfig

#args = ['--data-folder', 'data', '--regularization', 0.5]
args = ['--regularization', 0.5]

src = ScriptRunConfig(source_directory=script_folder,
                      script='train.py', 
                      arguments=args,
                      compute_target=compute_target,
                      environment=env)

###Submit the job to the cluster.

Run the experiment by submitting the ScriptRunConfig object. And you can navigate to Azure portal to monitor the run.

In [None]:
run = exp.submit(config=src)
run

Submitting /mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-abhi/code/Users/ha20269/creditcardfraud/creditcard-logistic directory for run. The size of the directory >= 25 MB, so it can take a few minutes.


Experiment,Id,Type,Status,Details Page,Docs Page
Classification-creditcard-logistic,Classification-creditcard-logistic_1624574401_928cd5e9,azureml.scriptrun,Preparing,Link to Azure Machine Learning studio,Link to Documentation


###Monitor a remote run

In [None]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

###Get logs upon completion.

Model training happens in the background. You can use wait_for_completion to block and wait until the model has completed training before running more code.

In [None]:
# specify show_output to True for a verbose log
run.wait_for_completion(show_output=True)

RunId: Classification-creditcard-logistic_1624574401_928cd5e9
Web View: https://ml.azure.com/runs/Classification-creditcard-logistic_1624574401_928cd5e9?wsid=/subscriptions/a10a02c4-6bf9-4524-b8d7-7e5a2c1e7f57/resourcegroups/qmul-abhi-msc/workspaces/qmul-abhi-ws&tid=569df091-b013-40e3-86ee-bd9cb9e25814

Execution Summary
RunId: Classification-creditcard-logistic_1624574401_928cd5e9
Web View: https://ml.azure.com/runs/Classification-creditcard-logistic_1624574401_928cd5e9?wsid=/subscriptions/a10a02c4-6bf9-4524-b8d7-7e5a2c1e7f57/resourcegroups/qmul-abhi-msc/workspaces/qmul-abhi-ws&tid=569df091-b013-40e3-86ee-bd9cb9e25814



{'runId': 'Classification-creditcard-logistic_1624574401_928cd5e9',
 'target': 'cpu-cluster-agx',
 'status': 'Completed',
 'startTimeUtc': '2021-06-24T22:40:19.995626Z',
 'endTimeUtc': '2021-06-24T22:41:01.610915Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '180601a3-d106-454a-8e35-bca8a7b40938',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--regularization', '0.5'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'cpu-cluster-agx',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'environment': {'name': 'c

###Display run results

In [None]:
print(run.get_metrics())

{'regularization rate': 0.5, 'accuracy': 0.9973233271319127}


##Register model

In [None]:
print(run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_5ce4c180bb919288f3100cea27499bd4a8abc33b3bd043d456b8fe07174b6aae_p.txt', 'azureml-logs/65_job_prep-tvmps_5ce4c180bb919288f3100cea27499bd4a8abc33b3bd043d456b8fe07174b6aae_p.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_5ce4c180bb919288f3100cea27499bd4a8abc33b3bd043d456b8fe07174b6aae_p.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/95_azureml.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/creditcard_logistic_model.pkl']


In [None]:
# register model 
model = run.register_model(model_name='creditcard_logistic', model_path='outputs/creditcard_logistic_model.pkl')
print(model.name, model.id, model.version, sep='\n')

creditcard_logistic
creditcard_logistic:1
1


#Part 2 - Deploy the model in Azure Container Instance (ACI)

In Part-1, we trained a classification model and registered it in our workspace in Azure cloud.

Now, we will deploy the model as a web service in Azure Container Instances (ACI). A web service here is a Docker image that encapsulates the scoring logic and the model itself.

Please note that it is advised to use Azure Kubernetes services for production deployments.


##Import the packages

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
 
import azureml.core

# display the core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.30.0


##Create a scoring script.

A scoring script is used by the web service call to show how a model is used.


In [None]:
%%writefile score_unencrypted.py
import json
import numpy as np
import os
import pickle
import joblib

def init():
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'creditcard_logistic_model.pkl')
    model = joblib.load(model_path)

def run(raw_data):
    data = np.array(json.loads(raw_data)['data'])
    # make prediction
    y_hat = model.predict(data)
    # you can return any data type as long as it is JSON-serializable
    return y_hat.tolist()

Writing score_unencrypted.py


##Create deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for our ACI container.

In [None]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={"data": "creditcard",  "method" : "sklearn logistic regression"}, 
                                               description='Predict credit card fraud with sklearn')

##Deploy in ACI.

Configure the image and deploy. The following code goes through these steps:

Create environment object containing dependencies needed by the model using the environment file (myenv.yml)
Create inference configuration necessary to deploy the model as a web service using:
The scoring file (score.py)
envrionment object created in previous step
Deploy the model to the ACI container.
Get the web service HTTP endpoint.

In [None]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
model = Model(ws, 'creditcard_logistic')


myenv = Environment.get(workspace=ws, name="classification-creditcard-env", version="1")
inference_config = InferenceConfig(entry_script="score_unencrypted.py", environment=myenv)

service_name = 'sklearn-creditcard-svc-' + str(uuid.uuid4())[:4]
service = Model.deploy(workspace=ws, 
                       name=service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-06-24 22:51:12+00:00 Creating Container Registry if not exists.
2021-06-24 22:51:12+00:00 Registering the environment.
2021-06-24 22:51:13+00:00 Use the existing image.
2021-06-24 22:51:13+00:00 Generating deployment configuration.
2021-06-24 22:51:14+00:00 Submitting deployment to compute..
2021-06-24 22:51:20+00:00 Checking the status of deployment sklearn-creditcard-svc-1ad1..
2021-06-24 22:54:56+00:00 Checking the status of inference endpoint sklearn-creditcard-svc-1ad1.
Succeeded
ACI service creation operation finished, operation "Succeeded"
CPU times: user 4.02 s, sys: 251 ms, total: 4.27 s
Wall time: 3min 51s


Get the scoring web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application.

In [None]:
print(service.scoring_uri)

http://28e1bd93-34db-48ff-bbad-e7566b64643b.uksouth.azurecontainer.io/score


##Test the model

In [None]:
import os
from azureml.core import Dataset
import pandas as pd

#data_folder = os.path.join(os.getcwd(), 'data')
#os.makedirs(data_folder, exist_ok=True)
#data_folder = os.path.join(os.getcwd(), 'data')

#test dataset
df_test = pd.read_csv('creditcard_test.csv')
print(df_test.shape)

feature_names = df_test.iloc[:, 1:30].columns
target = df_test.iloc[:1, 30:].columns

print(feature_names)
print(target)

data_features = df_test[feature_names]
data_target = df_test[target]

X_test = data_features.to_numpy()
y_test = data_target.to_numpy().reshape(-1)

print(X_test)
print(y_test)

(18, 31)
Index(['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
       'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
       'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'],
      dtype='object')
Index(['Class'], dtype='object')
[[-9.16978982e+00  7.09219680e+00 -1.23540369e+01  4.24306897e+00
  -7.17643776e+00 -3.38661805e+00 -8.05801195e+00  6.44290866e+00
  -2.41298660e+00 -6.13490689e+00  2.82667123e+00 -6.30984278e+00
  -6.23002386e-01 -7.27986922e+00  9.24233226e-01 -4.21553713e+00
  -7.17167241e+00 -2.55033668e+00  5.96364321e-01  8.16651662e-01
   9.26156961e-01 -8.17706132e-01 -1.50434416e-01 -3.93830600e-02
   4.85639754e-01 -2.64324609e-01  1.15969046e+00  2.32758088e-01
   9.99900000e+01]
 [-9.84877583e+00  7.36554649e+00 -1.28985382e+01  4.27332307e+00
  -7.61199101e+00 -3.42704524e+00 -8.35080815e+00  6.86360392e+00
  -2.38756733e+00 -6.06578236e+00  2.70785640e+00 -6.13603447e+00
  -5.45976096e-01 -7.02898027e+00  9.1

###Predict test data

The following code goes through these steps:

Send the data as a JSON array to the web service hosted in ACI.

Use the SDK's run API to invoke the service. You can also make raw calls using any HTTP tool such as curl.


In [None]:
import json
test = json.dumps({"data": X_test.tolist()})
test = bytes(test, encoding='utf8')
y_hat = service.run(input_data=test)

print(y_hat)

[1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


###Examine the confusion matrix.

Generate a confusion matrix to see how many samples from the test set are classified correctly. Notice the mis-classified value for the incorrect predictions.

In [None]:
from sklearn.metrics import confusion_matrix

conf_mx = confusion_matrix(y_test, y_hat)
print(conf_mx)
print('Overall accuracy:', np.average(y_hat == y_test))

[[9 0]
 [4 5]]
Overall accuracy: 0.7777777777777778


Sending raw HTTP requests to test the web service.


In [None]:
import requests

# send a random row from the test set to score
random_index = np.random.randint(0, len(X_test)-1)
print(random_index)
input_data = "{\"data\": [" + str(list(X_test[random_index])) + "]}"

headers = {'Content-Type':'application/json'}

# for AKS deployment you'd need to the service key in the header as well
# api_key = service.get_key()
# headers = {'Content-Type':'application/json',  'Authorization':('Bearer '+ api_key)} 

resp = requests.post(service.scoring_uri, input_data, headers=headers)

print("POST to url", service.scoring_uri)
#print("input data:", input_data)
print("label:", y_test[random_index])
print("prediction:", resp.text)

3
POST to url http://28e1bd93-34db-48ff-bbad-e7566b64643b.uksouth.azurecontainer.io/score
label: 1
prediction: [1]


#Part 3 - Deploy a fraud classification model for encrypted inferencing in Azure.

In this part, we deploy the model as an encrypted inferencing web service in Azure Container Instances (ACI).

Add encrypted-inference package as a conda dependency

In [None]:
from azureml.core.environment import Environment
from azureml.core.workspace import Workspace
from azureml.core.conda_dependencies import CondaDependencies

# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\n')

# to install required packages
env = Environment('encrypted-creditcard-env')
cd = CondaDependencies.create(pip_packages=['azureml-dataset-runtime[pandas,fuse]', 'azureml-defaults', 'azure-storage-blob', 'encrypted-inference==0.9'], conda_packages = ['scikit-learn==0.22.1'])

env.python.conda_dependencies = cd

# Register environment to re-use later
env.register(workspace = ws)

qmul-abhi-ws
uksouth
qmul-abhi-msc


{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210513.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "encrypted-creditcard-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "co

##Import packages

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
 
import azureml.core

# display the core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.30.0


##Install Homomorphic Encryption based library for Secure Inferencing

Our library is based on [Microsoft SEAL](https://github.com/Microsoft/SEAL) and pubished to [PyPi.org](https://pypi.org/project/encrypted-inference) as an easy to use package 

In [None]:
!pip install encrypted-inference==0.9



##Deploy as web service
Deploy the model as a web service hosted in ACI.

To build the correct environment for ACI, provide the following:

A scoring script to show how to use the model
A configuration file to build the ACI
The model you trained before

###Create scoring script
Create the scoring script, called score.py, used by the web service call to show how to use the model.

You must include two required functions into the scoring script:

The init() function, which typically loads the model into a global object. This function is run only once when the Docker container is started.

The run(input_data) function uses the model to predict a value based on the input data. Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported. The function fetches homomorphic encryption based public keys that are uploaded by the service caller.

In [None]:
%%writefile score_encrypted.py
import json
import os
import pickle
import joblib
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, PublicAccess
from encrypted.inference.eiserver import EIServer

def init():
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'creditcard_logistic_model.pkl')
    model = joblib.load(model_path)

    global server
    server = EIServer(model.coef_, model.intercept_, verbose=True)

def run(raw_data):

    json_properties = json.loads(raw_data)

    key_id = json_properties['key_id']
    conn_str = json_properties['conn_str']
    container = json_properties['container']
    data = json_properties['data']

    # download the Galois keys from blob storage
    #TODO optimize by caching the keys locally  
    blob_service_client = BlobServiceClient.from_connection_string(conn_str=conn_str)
    blob_client = blob_service_client.get_blob_client(container=container, blob=key_id)
    public_keys = blob_client.download_blob().readall()
    
    result = {}
    # make prediction
    result = server.predict(data, public_keys)

    # you can return any data type as long as it is JSON-serializable
    return result

Writing score_encrypted.py


###Create configuration file.

In [None]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={"data": "creditcard",  "method" : "logistic"}, 
                                               description='Encrypted Predict credit card fraud with sklearn + SEAL')

###Deploy in ACI.

In [None]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
model = Model(ws, 'creditcard_logistic')

myenv = Environment.get(workspace=ws, name="encrypted-creditcard-env")
inference_config = InferenceConfig(entry_script="score_encrypted.py", environment=myenv)

service_name = 'encrypted-creditcard-svc-' + str(uuid.uuid4())[:4]
service = Model.deploy(workspace=ws, 
                       name=service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-06-25 10:49:29+00:00 Creating Container Registry if not exists.
2021-06-25 10:49:29+00:00 Registering the environment.
2021-06-25 10:49:30+00:00 Building image..
2021-06-25 10:56:01+00:00 Generating deployment configuration.
2021-06-25 10:56:02+00:00 Submitting deployment to compute..
2021-06-25 10:56:08+00:00 Checking the status of deployment encrypted-creditcard-svc-57eb..
2021-06-25 11:14:55+00:00 Checking the status of inference endpoint encrypted-creditcard-svc-57eb.
Succeeded
ACI service creation operation finished, operation "Succeeded"
CPU times: user 3.01 s, sys: 238 ms, total: 3.25 s
Wall time: 25min 35s


In [None]:
print(service.scoring_uri)

http://d28ee9c5-833a-476b-8061-dd09858ceaaa.uksouth.azurecontainer.io/score


##Test the model for encrypted inferencing

In [None]:
import os
import pandas as pd
from azureml.core import Dataset

#data_folder = os.path.join(os.getcwd(), 'data')
#os.makedirs(data_folder, exist_ok=True)
#data_folder = os.path.join(os.getcwd(), 'data')

#test dataset
df_test = pd.read_csv('creditcard_test.csv')
print(df_test.shape)

feature_names = df_test.iloc[:, 1:30].columns
target = df_test.iloc[:1, 30:].columns

print(feature_names)
print(target)

data_features = df_test[feature_names]
data_target = df_test[target]

X_test = data_features.to_numpy()
y_test = data_target.to_numpy().reshape(-1)

print(X_test)
print(y_test)

(18, 31)
Index(['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
       'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21',
       'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount'],
      dtype='object')
Index(['Class'], dtype='object')
[[-9.16978982e+00  7.09219680e+00 -1.23540369e+01  4.24306897e+00
  -7.17643776e+00 -3.38661805e+00 -8.05801195e+00  6.44290866e+00
  -2.41298660e+00 -6.13490689e+00  2.82667123e+00 -6.30984278e+00
  -6.23002386e-01 -7.27986922e+00  9.24233226e-01 -4.21553713e+00
  -7.17167241e+00 -2.55033668e+00  5.96364321e-01  8.16651662e-01
   9.26156961e-01 -8.17706132e-01 -1.50434416e-01 -3.93830600e-02
   4.85639754e-01 -2.64324609e-01  1.15969046e+00  2.32758088e-01
   9.99900000e+01]
 [-9.84877583e+00  7.36554649e+00 -1.28985382e+01  4.27332307e+00
  -7.61199101e+00 -3.42704524e+00 -8.35080815e+00  6.86360392e+00
  -2.38756733e+00 -6.06578236e+00  2.70785640e+00 -6.13603447e+00
  -5.45976096e-01 -7.02898027e+00  9.1

###Predict test data

Feed the test dataset to the model to get predictions.


The following code goes through these steps:

1. Create our Homomorphic Encryption based client 

1. Upload HE generated public keys 

1. Encrypt the data

1. Send the data as JSON to the web service hosted in ACI. 

1. Use the SDK's `run` API to invoke the service. You can also make raw calls using any HTTP tool such as curl.

#### Create our Homomorphic Encryption based client 

Create a new EILinearRegressionClient and setup the public keys 

In [None]:
from encrypted.inference.eiclient import EILinearRegressionClient

# Create a new Encrypted inference client and a new secret key.
edp = EILinearRegressionClient(verbose=True)

public_keys_blob, public_keys_data = edp.get_public_keys()


Session ID: gv5nwyc0pb7wecxaj0o781hqhed0wy8l
Secret key: 5w6tdcr4UQswoO8XiaIdxGo2EcfEi+IE/pkzT6OeJG6FL9ZX2WRS5Mjq8jfOxzp2UjVFDO6Mgjt1ZcunWXYLJg==
Public keys ID: ufy7sa6pvjrm6kgfu7asvmyfbfmaznsb
Generating public keys
Public keys generated: 8.563 MB


#### Upload HE generated public keys

Upload the public keys to the workspace default blob store. This will allow us to share the keys with the inference server

In [None]:
import azureml.core
from azureml.core import Workspace, Datastore
import os

ws = Workspace.from_config()

datastore = ws.get_default_datastore()
container_name=datastore.container_name

# Create a local file and write the keys to it
public_keys = open(public_keys_blob, "wb")
public_keys.write(public_keys_data)
public_keys.close()

# Upload the file to blob store
datastore.upload_files([public_keys_blob])

# Delete the local file
os.remove(public_keys_blob)

Uploading an estimated of 1 files
Uploading ufy7sa6pvjrm6kgfu7asvmyfbfmaznsb
Uploaded ufy7sa6pvjrm6kgfu7asvmyfbfmaznsb, 1 files out of an estimated total of 1
Uploaded 1 files


### Encrypt the data

In [None]:
#choose any one sample from the test data 
sample_index = 3

print(X_test[sample_index])
#encrypt the data
raw_data = edp.encrypt(X_test[sample_index])
print(raw_data)


[-1.12054614e+01  7.91463349e+00 -1.39877516e+01  4.33334118e+00
 -8.48496952e+00 -3.50656117e+00 -8.93524303e+00  7.70444915e+00
 -2.33658418e+00 -5.92735917e+00  2.47040102e+00 -5.78851703e+00
 -3.91939257e-01 -6.52746224e+00  8.99859362e-01 -3.84729284e+00
 -6.70063745e+00 -2.49261611e+00  4.69554314e-01  8.60911742e-01
  9.42593331e-01 -9.87848115e-01 -2.79446305e-01 -2.72992250e-02
  6.44344196e-01 -2.63077923e-01  1.08402253e+00  2.11933357e-01
  9.99900000e+01]
Encrypting input data
Input data encrypted: ... QFlEZCRFq6GxFp6VQQkA6lJCQUKeEHkiKd0iAlpSAlIUi9/5nnfWae/bI7c328Znd2oZiWAEVs7pXbN3rG9setrQgKcVP9d9+lGi ...
Created 1 ciphertext(s) with a size of 214.734 KB
['XqEQAwUBAAAzhAIAAAAAAHicTLdlUFaPGzR87nN3HpAWpJGQEqQFlEZCRFq6GxFp6VQQkA6lJCQUKeEHkiKd0iAlpSAlIUi9/5nnfWae/bI7c328Znd2oZiWAEVs7pXbN3rG9setrQgKcVP9d9+lGiRjaB/b6V6BgcD/D+b/Q3Dg/8VjOdM8Mjjyf4r2/17M/w/9vXIXVbjHhmYzjot5TDCBKdB9Hb/i/AWmx7PGkzRfT/ie1lv3gTWA8GTGEvfFjA+tzhZ09dnzaDBTQNXI+dsycucFt0rRlQ6kvkanffagG8aZY6GfS6MALL1f+uh9Ww

#### Send the test data to the webservice hosted in ACI

Feed the test dataset to the model to get predictions. We will need to send the connection string to the blob storage where the public keys were uploaded 


In [None]:
import json
from azureml.core import Webservice

print(service_name)
service = Webservice(ws, service_name)

#pass the connection string for blob storage to give the server access to the uploaded public keys 
conn_str_template = 'DefaultEndpointsProtocol={};AccountName={};AccountKey={};EndpointSuffix=core.windows.net'
conn_str = conn_str_template.format(datastore.protocol, datastore.account_name, datastore.account_key)
print(conn_str)

#build the json 
data = json.dumps({"data": raw_data, "key_id" : public_keys_blob, "conn_str" : conn_str, "container" : container_name })
data = bytes(data, encoding='ASCII')
print(data)

print ('Making an encrypted inference web service call ')
eresult = service.run(input_data=data)

print ('Received encrypted inference results')
print(eresult)

encrypted-creditcard-svc-57eb
DefaultEndpointsProtocol=https;AccountName=qmulabhiws5499744846;AccountKey=0SN/NkusgBAMgpVoIOVFpkeMkP0IxiUTCrA2fCxfBZFiEVgjGEZaAkv4ajU5EDBQ3rmgebC2h3gQhmMKpf94Cw==;EndpointSuffix=core.windows.net
b'{"data": ["XqEQAwUBAAAzhAIAAAAAAHicTLdlUFaPGzR87nN3HpAWpJGQEqQFlEZCRFq6GxFp6VQQkA6lJCQUKeEHkiKd0iAlpSAlIUi9/5nnfWae/bI7c328Znd2oZiWAEVs7pXbN3rG9setrQgKcVP9d9+lGiRjaB/b6V6BgcD/D+b/Q3Dg/8VjOdM8Mjjyf4r2/17M/w/9vXIXVbjHhmYzjot5TDCBKdB9Hb/i/AWmx7PGkzRfT/ie1lv3gTWA8GTGEvfFjA+tzhZ09dnzaDBTQNXI+dsycucFt0rRlQ6kvkanffagG8aZY6GfS6MALL1f+uh9Ww+uC9Z0TOeyBjtlVOnSiSkkvVDk3RBr6If4IstdvpcZgw4h34+4Xpwh34pKipC3dcOWcGXl11uCkHssialKYR6gDfn22m84G8njjTM7ox09pKU9as72QAKWIcThECDtTRjr9fXWRr/CP5CWd/eilIMVDgQdqsoX4lmbOTurXhpijWJkIoLPJNDIL8+6GGKscCXoUZqVxasE6DZLYiLPDtJu81rTeMhLyJYPsnD7MAJc4E6sR3n1sHyRzkPwib8Efl7Ol21hZAjzO0WM/9Ea4f8wLEId7DBgF1OgjicuoM0axf0ivzOiWQqwamJDLqjDeMlf1Sb5pFTG6Q+mNU1Y69igxcBYI+TTlcQblgrc+IjIHvKHt/NJbIurp+yEXRLOa2hm/HMykVx8uKLWACS8dYrRdx1PxZg8nZ/1JVdATTcvk6/fjAYefEuRN

#### Decrypt the data

Use the client to decrypt the results

In [None]:
import numpy as np 

results = edp.decrypt(eresult)

print ('Decrypted the results ', results)

#Apply argmax to identify the prediction result
prediction = results.item(0)

print ( ' Prediction : ', prediction)
print ( ' Actual Label : ', y_test[sample_index])

Received encrypted response of size 1
Encrypted inference request completed
Decrypted the results  [0.34409818]
 Prediction :  0.3440981845138846
 Actual Label :  1
