# Train and deploy with Cloud AI Platform

In [49]:
import json
import os
import pandas as pd
import time
import googleapiclient.discovery

## Create a training application

In [3]:
TRAINING_APP_FOLDER = '../training_app/trainer'
os.makedirs(TRAINING_APP_FOLDER, exist_ok=True)
!touch $TRAINING_APP_FOLDER/__init__.py

### Create a training script

In [4]:
%%writefile $TRAINING_APP_FOLDER/train.py

import logging
import os
import subprocess
import sys

import fire
import numpy as np
import pandas as pd

from sklearn.externals import joblib
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.linear_model import Ridge
from sklearn.manifold import TSNE 
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


def train(job_dir, data_path, n_features_options, l2_reg_options):
    
  # Load data from GCS
  df_train = pd.read_csv(data_path)

  y = df_train.octane
  X = df_train.drop('octane', axis=1)
    
  pipeline = Pipeline([
    ('scale', StandardScaler()),
    ('reduce_dim', PCA()),
    ('regress', Ridge())
  ])

  param_grid = [
    {
      'reduce_dim__n_components': n_features_options,
      'regress__alpha': l2_reg_options
    }
  ]

  grid = GridSearchCV(pipeline, cv=10, n_jobs=None, param_grid=param_grid, scoring='neg_mean_squared_error', iid=False)
  
  grid.fit(X, y)

  logging.info("Best estimator: {}".format(grid.best_params_))
  logging.info("Best score: {}".format(grid.best_score_))
    
  # Retrain the best model on a full dataset
  best_estimator = grid.best_estimator_
  trained_pipeline = best_estimator.fit(X, y)

  # Save the model
  model_filename = 'model.joblib'
  joblib.dump(value=trained_pipeline, filename=model_filename)
  gcs_model_path = "{}/trained_model/{}".format(job_dir, model_filename)
  subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path], stderr=sys.stdout)
  logging.info("Saved model in: {}".format(gcs_model_path)) 
    
if __name__ == "__main__":
  logging.basicConfig(level=logging.INFO)
  fire.Fire(train)

Writing ../training_app/trainer/train.py


### Configure dependencies

In [5]:
%%writefile $TRAINING_APP_FOLDER/../setup.py

from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = ['fire', 'gcsfs']

setup(
    name='trainer',
    version='0.1',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='My training application package.'
)


Writing ../training_app/trainer/../setup.py


## Submit a training job

In [6]:
JOB_NAME = "JOB_{}".format(time.strftime("%Y%m%d_%H%M%S"))
REGION = "us-west1"
SCALE_TIER = "BASIC"
JOB_DIR = "gs://jk-demo-jobdir/{}".format(JOB_NAME)
MODULE_NAME = "trainer.train"
RUNTIME_VERSION = "1.14"
PYTHON_VERSION = "3.5"
DATA_PATH="gs://jk-demo-datasets/gasdata/training.csv"
N_FEATURES_OPTIONS="[2,4,6]"
L2_REG_OPTIONS="[0.1,0.2,0.3,0.5]"

!gcloud ai-platform jobs submit training $JOB_NAME \
--region $REGION \
--job-dir $JOB_DIR \
--package-path $TRAINING_APP_FOLDER \
--module-name $MODULE_NAME \
--scale-tier $SCALE_TIER \
--python-version $PYTHON_VERSION \
--runtime-version $RUNTIME_VERSION \
-- \
--data_path $DATA_PATH \
--n_features_options $N_FEATURES_OPTIONS \
--l2_reg_options $L2_REG_OPTIONS

Job [JOB_20190820_152033] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe JOB_20190820_152033

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs JOB_20190820_152033
jobId: JOB_20190820_152033
state: QUEUED


In [7]:
!gcloud ai-platform jobs describe $JOB_NAME
!gcloud ai-platform jobs stream-logs $JOB_NAME

createTime: '2019-08-20T15:20:36Z'
etag: SiYiU1YtWDs=
jobId: JOB_20190820_152033
state: PREPARING
trainingInput:
  args:
  - --data_path
  - gs://jk-demo-datasets/gasdata/training.csv
  - --n_features_options
  - '[2,4,6]'
  - --l2_reg_options
  - '[0.1,0.2,0.3,0.5]'
  jobDir: gs://jk-demo-jobdir/JOB_20190820_152033
  packageUris:
  - gs://jk-demo-jobdir/JOB_20190820_152033/packages/98df965aec4082fb008fade81c9e857fc875bb85cf9a4bfd4d200c99fd9d0e4a/trainer-0.1.tar.gz
  pythonModule: trainer.train
  pythonVersion: '3.5'
  region: us-west1
  runtimeVersion: '1.14'
trainingOutput: {}

View job in the Cloud Console at:
https://console.cloud.google.com/mlengine/jobs/JOB_20190820_152033?project=jk-demo1

View logs at:
https://console.cloud.google.com/logs?resource=ml.googleapis.com%2Fjob_id%2FJOB_20190820_152033&project=jk-demo1
INFO	2019-08-20 15:20:35 +0000	service		Validating job requirements...
INFO	2019-08-20 15:20:36 +0000	service		Job creation request has been successfully validated.
IN

## Deploy using AI Platform Prediction

### Create a model resource

In [13]:
MODEL_NAME = "gas_octane_regression"
REGION = "us-central1"
LABELS = "task=regression,domain=chemometrics"
MODEL_PATH = "{}/trained_model/".format(JOB_DIR)
VERSION_NAME = "v01"

In [9]:
!gcloud ai-platform models create  $MODEL_NAME \
--regions=$REGION \
--labels=$LABELS

Created ml engine model [projects/jk-demo1/models/gas_octane_regression].


### Create a model version

In [10]:
!gcloud ai-platform versions create $VERSION_NAME \
--model=$MODEL_NAME \
--origin=$MODEL_PATH \
--runtime-version=1.14 \
--framework=scikit-learn \
--python-version=3.5

Creating version (this might take a few minutes)......done.                    


### Test the model
#### Prepare a file with test instances

In [11]:
TEST_DATASET_PATH = "gs://jk-demo-datasets/gasdata/testing.csv"
INPUT_FILE = 'instances.json'

df_test = pd.read_csv(TEST_DATASET_PATH).drop('octane', axis=1)
with open(INPUT_FILE, "w") as f:
  for index, row in df_test.iterrows():
    f.write(json.dumps(list(row.values)))
    f.write("\n")

#### Run predictions

In [12]:
!gcloud ai-platform predict \
--model $MODEL_NAME \
--version $VERSION_NAME \
--json-instances $INPUT_FILE

[83.61274117546486, 88.78144180442887, 86.60147562277632, 86.37821707761988, 84.57226241557935, 88.60871164935777]


## Deploy with a custom prediction routine
### Create a predictor class

In [15]:
PREDICT_APP_FOLDER = '../predict_app/'
os.makedirs(PREDICT_APP_FOLDER, exist_ok=True)

In [43]:
%%writefile $PREDICT_APP_FOLDER/predict.py

import os
from sklearn.externals import joblib

class OctaneRegressor(object):
    """A custom prediction routine for Octane regressor"""
    
    def __init__(self, model):
        """Stores the model loaded in from_path"""
        self._model = model
        
    def predict(self, instances, **kwargs):
        """Runs inference"""
    
        inputs = np.asarray(instances)
        outputs = self._model.predict(preprocessed_inputs)
        
        return outputs.tolist()

        
    @classmethod
    def from_path(cls, model_dir):
        """Loads the model from the joblib file"""
        model_path = os.path.join(model_dir, 'model.joblib')
        model = joblib.load(model_path)
        
        
        return cls(model)
    

Overwriting ../predict_app//predict.py


### Create a source distribution package

In [44]:
%%writefile $PREDICT_APP_FOLDER/setup.py

from setuptools import setup

setup(
    name='custom-predictor',
    description='Custom prediction routine.',
    version='0.1',
    scripts=['predict.py']
)

Overwriting ../predict_app//setup.py


In [45]:
%cd $PREDICT_APP_FOLDER
%run $PREDICT_APP_FOLDER/setup.py sdist --formats=gztar
%cd -

/home/jupyter/caip-demo/predict_app
running sdist
running egg_info
writing top-level names to custom_predictor.egg-info/top_level.txt
writing dependency_links to custom_predictor.egg-info/dependency_links.txt
writing custom_predictor.egg-info/PKG-INFO
reading manifest file 'custom_predictor.egg-info/SOURCES.txt'
writing manifest file 'custom_predictor.egg-info/SOURCES.txt'





running check






creating custom-predictor-0.1
creating custom-predictor-0.1/custom_predictor.egg-info
copying files to custom-predictor-0.1...
copying predict.py -> custom-predictor-0.1
copying setup.py -> custom-predictor-0.1
copying custom_predictor.egg-info/PKG-INFO -> custom-predictor-0.1/custom_predictor.egg-info
copying custom_predictor.egg-info/SOURCES.txt -> custom-predictor-0.1/custom_predictor.egg-info
copying custom_predictor.egg-info/dependency_links.txt -> custom-predictor-0.1/custom_predictor.egg-info
copying custom_predictor.egg-info/top_level.txt -> custom-predictor-0.1/custom_predictor.egg-info
Writing custom-predictor-0.1/setup.cfg
Creating tar archive
removing 'custom-predictor-0.1' (and everything under it)
/home/jupyter/caip-demo/notebooks


### Copy the source distribution package to GCS staging area

In [46]:
VERSION_NAME = "v02"
TARBALL_NAME = "custom-predictor-0.1.tar.gz"
LOCAL_PATH = "{}/dist/{}".format(PREDICT_APP_FOLDER, TARBALL_NAME)
GCS_PATH = "gs://jk-demo-staging/custom_predictors/octane/{}/{}".format(VERSION_NAME, TARBALL_NAME)

!gsutil cp $LOCAL_PATH $CUSTOM_PREDICTOR_PATH

Copying file://../predict_app//dist/custom-predictor-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  989.0 B/  989.0 B]                                                
Operation completed over 1 objects/989.0 B.                                      


### Create model version

In [47]:
!gcloud beta ai-platform versions create $VERSION_NAME \
--model=$MODEL_NAME \
--origin=$MODEL_PATH \
--runtime-version=1.14 \
--python-version=3.5 \
--package-uris $CUSTOM_PREDICTOR_PATH \
--prediction-class predict.OctaneRegressor

Creating version (this might take a few minutes)......done.                    


### Test the model

In [57]:
df_test = pd.read_csv(TEST_DATASET_PATH).drop('octane', axis=1)
instances = df_test.values.tolist()

In [60]:
PROJECT_ID = !gcloud config list project --format "value(core.project)"
PROJECT_ID = PROJECT_ID[0]

'jk-demo1'

In [71]:
%env GOOGLE_APPLICATION_CREDENTIALS="/home/jupyter/.keys/jk-demo1-ainotebook.json"

env: GOOGLE_APPLICATION_CREDENTIALS="/home/jupyter/.keys/jk-demo1-ainotebook.json"


In [72]:
%env GOOGLE_APPLICATION_CREDENTIALS

'"/home/jupyter/.keys/jk-demo1-ainotebook.json"'

In [75]:
service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID, MODEL_NAME, VERSION_NAME)

response = service.projects().predict(
    name=name,
    body={'instances': instances}
).execute()

if 'error' in response:
    raise RuntimeError(response['error'])
else:
  print(response['predictions'])

Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/googleapiclient/discovery_cache/__init__.py", line 36, in autodetect
    from google.appengine.api import memcache
ImportError: No module named 'google.appengine'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ImportError: No module named 'oauth2client.contrib.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ImportError: No module named 'oauth2client.locked_file'

During handling of the above exception, another exception occurred:

Traceback (

DefaultCredentialsError: File "/home/jupyter/.keys/jk-demo1-ainotebook.json" was not found.