In [2]:
BUCKET_NAME = "qwiklabs-gcp-03-7aa8cf07fc69" #@param {type:"string"}
REGION = "us-east1" #@param {type:"string"}

In [3]:
# Creating a bucket

! gsutil mb -l $REGION gs://$BUCKET_NAME

Creating gs://qwiklabs-gcp-03-7aa8cf07fc69/...


In [4]:
# Test access to the bucket

! gsutil ls -al gs://$BUCKET_NAME

In [6]:
# Create SQL query using natality data after the year 2000
query = """
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks
  --,FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING))) AS hashmonth
FROM
  publicdata.samples.natality
WHERE year > 2000
"""

In [8]:
# Call BigQuery and examine in dataframe
from google.cloud import bigquery
df = bigquery.Client().query(query + " LIMIT 1000").to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks
0,6.68662,True,18,1,43.0
1,9.360828,True,32,1,41.0
2,8.437091,False,30,1,39.0
3,6.124442,False,24,1,40.0
4,7.12534,False,26,1,41.0


In [9]:
%%writefile preprocess.py
import numpy as np
import pandas as pd

class MySimpleScaler(object):
 

 def preprocess(self, data):
        
    data = pd.DataFrame(data,columns=['is_male', 'mother_age', 'plurality','gestation_weeks'])
    #data = data[data.weight_pounds  > 0]
    data = data[data.mother_age  > 0]
    data = data[data.plurality > 0]
    data = data[data.gestation_weeks > 0]
    print(data.shape)

    x_cols = ['mother_age', 'plurality', 'gestation_weeks', True]
    # Get one hot encoding of columns B
    one_hot = pd.get_dummies(data['is_male'])
    # Drop column B as it is now encoded
    data = data.drop('is_male',axis = 1)
    # Join the encoded df
    data = data.join(one_hot)


    return data[x_cols]

Overwriting preprocess.py


In [10]:
import pickle
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib

from preprocess import MySimpleScaler


scaler = MySimpleScaler()
X = scaler.preprocess(np.asarray(df[['is_male', 'mother_age', 'plurality','gestation_weeks']]))

merged = X.merge(df[['weight_pounds']], left_index=True, right_index=True, how='inner')
#print(data_new)

model = RandomForestRegressor(max_depth=2, random_state=0)
model.fit(X, merged['weight_pounds'])

(991, 4)


  from numpy.core.umath_tests import inner1d


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

In [12]:
X.head()

Unnamed: 0,mother_age,plurality,gestation_weeks,True
0,18,1,43,1
1,32,1,41,1
2,30,1,39,0
3,24,1,40,0
4,26,1,41,0


In [13]:
print(model.feature_importances_)
print(model.score(X,merged['weight_pounds']))

[0. 0. 1. 0.]
0.3579143130149024


In [17]:
import numpy as np
model.predict([[32, 1, 37, True]])


array([6.8564948])

In [19]:
joblib.dump(model, 'model.joblib')
with open ('preprocessor.pkl', 'wb') as f:
  pickle.dump(scaler, f)

In [None]:
Deploying a custom prediction routine
To deploy a custom prediction routine to serve predictions from your trained model, do the following:

Create a custom predictor to handle requests
Package your predictor and your preprocessing module
Upload your model artifacts and your custom code to Cloud Storage
Deploy your custom prediction routine to AI Platform

In [20]:
%%writefile predictor.py
import os
import pickle

import numpy as np
from sklearn.externals import joblib

class MyPredictor(object):
  def __init__(self, model, preprocessor):
    self._model = model
    self._preprocessor = preprocessor
    #self._class_names = load_iris().target_names

  def predict(self, instances, **kwargs):
    inputs = np.asarray(instances, dtype = object)
    preprocessed_inputs = self._preprocessor.preprocess(inputs)
    # print(preprocessed_inputs)
    outputs = self._model.predict(preprocessed_inputs)
    return list(outputs)

  @classmethod
  def from_path(cls, model_dir):
    model_path = os.path.join(model_dir, 'model.joblib')
    model = joblib.load(model_path)

    preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl')
    with open(preprocessor_path, 'rb') as f:
      preprocessor = pickle.load(f)

    return cls(model, preprocessor)

Overwriting predictor.py


In [21]:
%%writefile setup.py
from setuptools import setup

setup(
    name='my_custom_code',
    version='0.1',
    scripts=['predictor.py', 'preprocess.py'])

Overwriting setup.py


In [22]:
! python setup.py sdist --formats=gztar

running sdist
running egg_info
writing my_custom_code.egg-info/PKG-INFO
writing top-level names to my_custom_code.egg-info/top_level.txt
writing dependency_links to my_custom_code.egg-info/dependency_links.txt
reading manifest file 'my_custom_code.egg-info/SOURCES.txt'
writing manifest file 'my_custom_code.egg-info/SOURCES.txt'
running check


creating my_custom_code-0.1
creating my_custom_code-0.1/my_custom_code.egg-info
copying files to my_custom_code-0.1...
copying README.md -> my_custom_code-0.1
copying predictor.py -> my_custom_code-0.1
copying preprocess.py -> my_custom_code-0.1
copying setup.py -> my_custom_code-0.1
copying my_custom_code.egg-info/PKG-INFO -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/SOURCES.txt -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/dependency_links.txt -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/top_level.txt -> my_custom_code-0.1/my_custom_code.egg-info
W

In [23]:
# Upload model artifacts and custom code to Cloud Storage
# model.joblib (model artifact)
# preprocessor.pkl (model artifact)
# my_custom_code-0.1.tar.gz (custom code)

! gsutil cp ./dist/my_custom_code-0.1.tar.gz gs://$BUCKET_NAME/custom_prediction_routine/my_custom_code-0.1.tar.gz
! gsutil cp model.joblib preprocessor.pkl gs://$BUCKET_NAME/custom_prediction_routine/model/

Copying file://./dist/my_custom_code-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  1.3 KiB/  1.3 KiB]                                                
Operation completed over 1 objects/1.3 KiB.                                      
Copying file://model.joblib [Content-Type=application/octet-stream]...
Copying file://preprocessor.pkl [Content-Type=application/octet-stream]...      
/ [2 files][  8.4 KiB/  8.4 KiB]                                                
Operation completed over 2 objects/8.4 KiB.                                      


In [24]:
MODEL_NAME = 'CPR_test'
VERSION_NAME = 'v1'

In [25]:
# create your model

! gcloud ai-platform models create $MODEL_NAME \
  --regions $REGION

Created ml engine model [projects/qwiklabs-gcp-03-7aa8cf07fc69/models/CPR_test].


In [26]:
# --quiet automatically installs the beta component if it isn't already installed 
! gcloud --quiet beta ai-platform versions create $VERSION_NAME \
  --model $MODEL_NAME \
  --runtime-version 1.13 \
  --python-version 3.5 \
  --origin gs://$BUCKET_NAME/custom_prediction_routine/model/ \
  --package-uris gs://$BUCKET_NAME/custom_prediction_routine/my_custom_code-0.1.tar.gz \
  --prediction-class predictor.MyPredictor

Creating version (this might take a few minutes)......done.                    


In [27]:
! pip install --upgrade google-api-python-client

Requirement already up-to-date: google-api-python-client in /usr/local/lib/python3.5/dist-packages (1.7.11)


In [28]:
PROJECT_ID = 'qwiklabs-gcp-03-7aa8cf07fc69'

In [29]:
! gcloud config set project $PROJECT_ID

Updated property [core/project].


In [36]:
import googleapiclient.discovery

instances = [[True, 37, 1, 37.0],
            [False, 30, 1, 39.0]]


#instances = np.asarray(df[['mother_age', 'plurality', 'gestation_weeks','is_male']].head())

service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID, MODEL_NAME, VERSION_NAME)



In [37]:
response = service.projects().predict(
    name=name,
    body={'instances': instances}
).execute()



In [38]:
response

{'predictions': [6.856494795111921, 7.595946823848207]}