In [3]:
import azureml.core
from azureml.core import Workspace, Experiment, Run

In [4]:
print("SDK version:", azureml.core.VERSION)

SDK version: 0.1.68


# Initialize existing workspace

In [5]:
subscription_id = "xxxxxxxxx"
resource_group = "xxxxxxx"
workspace_name = "xxxxxx"
workspace_region = "xxxxxxx"

ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)

ws.write_config()

Wrote the config file config.json to: C:\Users\schapira.d\aml_config\config.json


experiment_name = 'train-in-notebook'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.start_logging()

# Training Model

In [6]:
import numpy
import pandas as pd

#sklearn models
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.externals import joblib
from sklearn.metrics import classification_report,accuracy_score


#Load data from local folder
path = 'C:\\Users\\schapira.d\\Desktop\\GCR Analytics\\AE Classifier\\amazon_book_reviews.csv'
df = pd.read_csv(path,encoding='UTF-8')
df = df[['Rating','Text']]

#Prepare data
X = df['Text'].fillna('').tolist()
X = [str(i) for i in X]
y = df['Rating'].fillna('').tolist()

RANDOM_STATE = 42

#Split train & test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=RANDOM_STATE)

#Initialize models
tfidf = TfidfVectorizer(ngram_range=(1, 2),stop_words='english')
clf = LinearSVC(random_state=RANDOM_STATE)

#Fit 
clf.fit(tfidf.fit_transform(X_train),y_train)

#Predict
y_pred = clf.predict(tfidf.transform(X_test))

#Measure model accuracy
report = classification_report(y_test, y_pred)
accuracy_score = accuracy_score(y_test,y_pred)
print(report)
#run.log('accuract score',accuracy_score)

#Serialize models
from sklearn.externals import joblib
joblib.dump(value=tfidf, filename='tfidf_books.pkl')
joblib.dump(value=clf, filename='LinearSVC_books.pkl')

#run.upload_file(name='outputs/tfidf_books.pkl', path_or_stream='./tfidf_books.pkl')
#run.upload_file(name='outputs/LinearSVC_books.pkl', path_or_stream='./LinearSVC_books.pkl')

#run.complete()
print("Models saved to pickle")

  interactivity=interactivity, compiler=compiler, result=result)
  if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):


             precision    recall  f1-score   support

          1       0.66      0.45      0.53        56
          2       0.58      0.23      0.33        48
          4       0.59      0.33      0.42        97
          5       0.73      0.96      0.83       291

avg / total       0.68      0.71      0.67       492

Models saved to pickle


In [7]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
train-in-notebook,d6f06d1a-4670-48fc-8052-9e23713bc318,,Completed,Link to Azure Portal,Link to Documentation


# Register Model

In [9]:
from azureml.core.model import Model

model_1 = Model.register(model_path = "tfidf_books.pkl",
                       model_name = "tfidf_books.pkl",
                       workspace = ws)

model_2 = Model.register(model_path = "LinearSVC_books.pkl",
                       model_name = "LinearSVC_books.pkl",
                       workspace = ws)

Registering model tfidf_books.pkl
Registering model LinearSVC_books.pkl


# Scoring script

In [7]:
%%writefile score.py
import pickle
import json
import numpy as np
from sklearn.externals import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from azureml.core.model import Model

def init():
    global model_1,model_2
    # get model path
    model_path1 = Model.get_model_path(model_name="tfidf_books.pkl")
    model_path2 = Model.get_model_path(model_name="LinearSVC_books.pkl")
    # deserialize the model file back into a sklearn model
    model_1,model_2 = joblib.load(model_path1), joblib.load(model_path2)


# note you can pass in multiple rows for scoring
def run(raw_data):
    # make prediction
    data = json.loads(raw_data)['data']
    y_pred = model_2.predict(model_1.transform(data)).tolist()
    return json.dumps({"prediction": y_pred})

Writing score1.py


# Create Web Service

In [10]:
%%time
from azureml.core.conda_dependencies import CondaDependencies 
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.image import ContainerImage

#Create environment
myenv = CondaDependencies()
myenv.add_conda_package("scikit-learn")

with open("envtest.yml","w") as f: #define here file name for the environment
    f.write(myenv.serialize_to_string())

#Create ACI configuration file
#Specify number of CPUs and gigabyte of RAM needed for your ACI container
aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               description='Predict book rating from review')

#Create container image
image_config = ContainerImage.image_configuration(execution_script="score.py", 
                                    runtime="python", 
                                    conda_file="envtest.yml")

#Create Web Service from model
service_name = 'my-aci-svc-2'
service = Webservice.deploy_from_model(name=service_name,
                                       deployment_config=aciconfig,
                                       models=[model_1,model_2],
                                       image_config=image_config,
                                       workspace=ws)

service.wait_for_deployment(show_output=True)

Creating image
Image creation operation finished for image my-aci-svc-2:1, operation "Succeeded"
Creating service
Running........................................
SucceededACI service creation operation finished, operation "Succeeded"
Wall time: 6min 39s


# Web service HTTP endpoint
Get the scoring web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application.

In [11]:
print('Web service HTTP endpoint: ',service.scoring_uri)

Web service HTTP endpoint:  http://13.73.178.223:80/score


# Test Service

In [18]:
import json
text = ['best book','worst book','i would recommend this book']

input_data = json.dumps({'data':text})
input_data

'{"data": ["best book", "worst book", "i would recommend this book"]}'

In [19]:
%%time
result = service.run(input_data=input_data)
print(result)

{"prediction": [5, 1, 5]}
Wall time: 223 ms
