In [1]:
# Ensure AI Platform API is enabled and sklearn package version installed is '0.20.2'
# !sudo pip uninstall -y scikit-learn
# ! pip install scikit-learn==0.20.2

In [2]:
import sklearn
print(sklearn.__version__)

0.20.2


In [3]:
#Import the required libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import witwidget
import os
import pickle

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

from sklearn.utils import shuffle
from sklearn.linear_model import LinearRegression
from witwidget.notebook.visualization import WitWidget, WitConfigBuilder

print(tf.__version__)

1.15.2


In [4]:
!wget 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'

--2020-02-15 06:17:48--  http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 264426 (258K) [application/x-httpd-php]
Saving to: ‘winequality-white.csv.1’


2020-02-15 06:17:49 (1.99 MB/s) - ‘winequality-white.csv.1’ saved [264426/264426]



In [5]:
# Create Dataframes
data = pd.read_csv('winequality-white.csv', index_col=False, delimiter=';')
data = shuffle(data, random_state=4)

data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
3545,7.3,0.2,0.29,19.5,0.039,69.0,237.0,1.00037,3.1,0.48,9.2,6
2460,6.6,0.33,0.24,16.05,0.045,31.0,147.0,0.99822,3.08,0.52,9.2,5
3855,5.9,0.18,0.29,4.6,0.032,68.0,137.0,0.99159,3.21,0.38,11.3,6
3427,5.8,0.24,0.26,10.05,0.039,63.0,162.0,0.99375,3.33,0.5,11.2,6
2033,7.2,0.23,0.33,12.7,0.049,50.0,183.0,0.9987,3.41,0.4,9.8,5


In [6]:
labels = data['quality']
print(labels.value_counts())

6    2198
5    1457
7     880
8     175
4     163
3      20
9       5
Name: quality, dtype: int64


In [7]:
# Train Test Split
data = data.drop(columns=['quality'])

train_size = int(len(data) * 0.8)
train_data = data[:train_size]
train_labels = labels[:train_size]

test_data = data[train_size:]
test_labels = labels[train_size:]

In [8]:
# Build TF Model
input_size = len(train_data.iloc[0])
print(input_size)

model = Sequential()
model.add(Dense(200, input_shape=(input_size,), activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

11
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 200)               2400      
_________________________________________________________________
dense_1 (Dense)              (None, 50)                10050     
_________________________________________________________________
dense_2 (Dense)              (None, 25)                1275      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 26        
Total params: 13,751
Trainable params: 13,751
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Train the Model
model.fit(
  train_data.values,
  train_labels.values, 
  epochs=4, 
  batch_size=32, 
  validation_split=0.1
)

Train on 3526 samples, validate on 392 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7faf41f4e0b8>

In [11]:
# Evaluate the model
model.evaluate(
  test_data.values,
  test_labels.values, 
  batch_size=32
)

test_prediction = model.predict(test_data.values[0:1])
print('Predicted wine score:', test_prediction[0][0])
print('Actual wine score:', test_labels.values[0:1][0])

Predicted wine score: 6.255574
Actual wine score: 6


In [14]:
# Deploy Model to AI Platform

GCP_PROJECT = 'western-emitter-267510'
KERAS_MODEL_BUCKET = 'gs://1077410235121keras'
KERAS_VERSION_NAME = 'v1'
#!gsutil mb $KERAS_MODEL_BUCKET

In [16]:
# Add the serving input layer below in order to serve our model on AI Platform
class ServingInput(tf.keras.layers.Layer):
  # the important detail in this boilerplate code is "trainable=False"
  def __init__(self, name, dtype, batch_input_shape=None):
    super(ServingInput, self).__init__(trainable=False, name=name, dtype=dtype, batch_input_shape=batch_input_shape)
  def get_config(self):
    return {'batch_input_shape': self._batch_input_shape, 'dtype': self.dtype, 'name': self.name }

restored_model = model

serving_model = tf.keras.Sequential()
serving_model.add(ServingInput('serving', tf.float32, (None, input_size)))
serving_model.add(restored_model)
tf.contrib.saved_model.save_keras_model(serving_model, os.path.join(KERAS_MODEL_BUCKET, 'keras_export'))  # export the model to your GCS bucket
export_path = KERAS_MODEL_BUCKET + '/keras_export'

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: gs://1077410235121keras/keras_export/saved_model.pb


In [17]:
!gcloud config set project $GCP_PROJECT

Updated property [core/project].


In [18]:
!gcloud ai-platform models create keras_wine

!gcloud beta ai-platform versions create $KERAS_VERSION_NAME --model keras_wine \
--origin=$export_path \
--python-version=3.5 \
--runtime-version=1.14 \
--framework='TENSORFLOW'

[1;31mERROR:[0m (gcloud.ai-platform.models.create) Resource in project [western-emitter-267510] is the subject of a conflict: Field: model.name Error: A model with the same name already exists.
- '@type': type.googleapis.com/google.rpc.BadRequest
  fieldViolations:
  - description: A model with the same name already exists.
    field: model.name
[1;31mERROR:[0m (gcloud.beta.ai-platform.versions.create) ALREADY_EXISTS: Field: version.name Error: A version with the same name already exists.
- '@type': type.googleapis.com/google.rpc.BadRequest
  fieldViolations:
  - description: A version with the same name already exists.
    field: version.name


In [19]:
%%writefile predictions.json
[7.8, 0.21, 0.49, 1.2, 0.036, 20.0, 99.0, 0.99, 3.05, 0.28, 12.1]

Overwriting predictions.json


In [20]:
prediction = !gcloud ai-platform predict --model=keras_wine --json-instances=predictions.json --version=$KERAS_VERSION_NAME
print(prediction[1])

[6.229109287261963]


In [21]:
!gcloud ai-platform predict --model=keras_wine --json-instances=predictions.json --version=$KERAS_VERSION_NAME

SEQUENTIAL
[6.229109287261963]


In [22]:
# Start the Scikit Learn Model work

SKLEARN_VERSION_NAME = 'v1'
SKLEARN_MODEL_BUCKET = 'gs://1077410235121scikit'
!gsutil mb $SKLEARN_MODEL_BUCKET

scikit_model = LinearRegression().fit(
  train_data.values, 
  train_labels.values
)

pickle.dump(scikit_model, open('model.pkl', 'wb'))

Creating gs://1077410235121scikit/...
ServiceException: 409 Bucket 1077410235121scikit already exists.


In [23]:
# Model deployment and testing
!gsutil cp ./model.pkl $SKLEARN_MODEL_BUCKET/model.pkl

#!gcloud ai-platform models create sklearn

Copying file://./model.pkl [Content-Type=application/octet-stream]...
/ [1 files][  674.0 B/  674.0 B]                                                
Operation completed over 1 objects/674.0 B.                                      


In [24]:
# !gcloud beta ai-platform versions create $SKLEARN_VERSION_NAME --model=sklearn \
# --origin=$SKLEARN_MODEL_BUCKET \
# --runtime-version=1.14 \
# --python-version=3.5 \
# --framework='SCIKIT_LEARN'

[1;31mERROR:[0m (gcloud.beta.ai-platform.versions.create) ALREADY_EXISTS: Field: version.name Error: A version with the same name already exists.
- '@type': type.googleapis.com/google.rpc.BadRequest
  fieldViolations:
  - description: A version with the same name already exists.
    field: version.name


In [25]:
!gcloud ai-platform predict --model=sklearn --json-instances=predictions.json --version=$SKLEARN_VERSION_NAME

[6.319000502718211]


In [26]:
test_examples = np.hstack((test_data[:200].values,test_labels[:200].values.reshape(-1,1)))

In [28]:
config_builder = (WitConfigBuilder(test_examples.tolist(), data.columns.tolist() + ['quality'])
  .set_ai_platform_model(GCP_PROJECT, 'keras_wine', KERAS_VERSION_NAME).set_predict_output_tensor('sequential').set_uses_predict_api(True)
  .set_target_feature('quality')
  .set_model_type('regression')
  .set_compare_ai_platform_model(GCP_PROJECT, 'sklearn', SKLEARN_VERSION_NAME))
WitWidget(config_builder, height=800)

WitWidget(config={'aip_batch_size': 500, 'compare_get_explanations': True, 'aip_service_name': 'ml', 'model_na…