# Create initial model

In [None]:
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.dataset import Dataset
import pandas as pd
import numpy as np
import pickle
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, recall_score, precision_score

def printScores(y_pred, y_true):
    print()
    cm = confusion_matrix(y_true, y_pred)
    print(cm)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    print( 'precision = ', precision, ', recall = ', recall)

# Create the train / test datasets using all the features

In [None]:
ws = Workspace.from_config()
allData = ws.datasets['qualitydataset'].to_pandas_dataframe()

allfeatures = ['Quality','S1','S2','S3','S5','S6','S7','S8','S9','S10','S11','S12','S13','S14','S15','S16','S17','S18','S19','S20','S21','S22','S23','S24','S25','S26','S27','S28','S29','S30','S31','S32','S33','S34','S35','S36','S37','S38','S39','S40','S41']

trainData = allData[allfeatures]
print(trainData.shape)
trainData = trainData.dropna()
print(trainData.shape)
trainData.head(50)

X = trainData[allfeatures[1:len(allfeatures)]].astype(float).values # exclude first feature
y = trainData[allfeatures[0]].astype(float).values  # use first feature
print(X)
print(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model_GBM = GradientBoostingClassifier(random_state=42, verbose=1)
model_GBM.fit(X_train, y_train)
printScores(model_GBM.predict(X_train), y_train)
printScores(model_GBM.predict(X_test), y_test)

# Serialize the model

In [None]:
modelFileName = 'quality-prediction-gbm.pkl'
with open(modelFileName,'wb') as f:
    pickle.dump(model_GBM, f)

# Register the model with Azure ML Workspace

In [None]:
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

model = Model.register(model_path = modelFileName,
                       model_name = modelFileName,
                       tags = {'area': "QualityPrediction", 'type': "GBM"},
                       description = "Quality prediction model",
                       workspace = ws)