In [1]:
BUCKET_NAME = "qwiklabs-gcp-01-c2d37df448e7" #@param {type:"string"}
REGION = "us-east1" #@param {type:"string"}

In [2]:
# Creating a bucket

! gsutil mb -l $REGION gs://$BUCKET_NAME

Creating gs://qwiklabs-gcp-01-c2d37df448e7/...


In [3]:
# Test access to the bucket

! gsutil ls -al gs://$BUCKET_NAME

In [4]:
# Create SQL query using natality data after the year 2000
query = """
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks,
  FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING))) AS hashmonth
FROM
  publicdata.samples.natality
WHERE year > 2000
"""

In [5]:
# Call BigQuery and examine in dataframe
from google.cloud import bigquery
df = bigquery.Client().query(query + " LIMIT 100").to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,hashmonth
0,6.68662,True,18,1,43,8904940584331855459
1,9.360828,True,32,1,41,1088037545023002395
2,8.437091,False,30,1,39,5896567601480310696
3,6.124442,False,24,1,40,-6244544205302024223
4,7.12534,False,26,1,41,-8029892925374153452


In [7]:
import pandas as pd



# Get one hot encoding of columns B
one_hot = pd.get_dummies(df['is_male'])
# Drop column B as it is now encoded
df = df.drop('is_male',axis = 1)
# Join the encoded df
df = df.join(one_hot)

In [30]:
df.head()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks,hashmonth,False,True
0,7.063611,32,1,37.0,7108882242435606404,0,1
1,4.687028,30,3,33.0,-7170969733900686954,0,1
2,7.561856,20,1,39.0,6392072535155213407,0,1
3,7.561856,31,1,37.0,-2126480030009879160,0,1
4,7.312733,32,1,40.0,3408502330831153141,0,1


In [5]:
%%writefile preprocess.py
import numpy as np
import pandas as pd

class MySimpleScaler(object):
 

 def preprocess(self, data):
    data = data[data.weight_pounds  > 0]
    data = data[data.mother_age  > 0]
    data = data[data.plurality > 0]
    data = data[data.gestation_weeks > 0]
    print(data.shape)

    x_cols = ['mother_age', 'plurality', 'gestation_weeks', True,False]
    # Get one hot encoding of columns B
    one_hot = pd.get_dummies(data['is_male'])
    # Drop column B as it is now encoded
    data = data.drop('is_male',axis = 1)
    # Join the encoded df
    data = data.join(one_hot)


    return data[x_cols],data['weight_pounds']

Overwriting preprocess.py


In [37]:

df = df[df.mother_age  > 0]
df = df[df.plurality > 0]
df = df[df.gestation_weeks > 0]



In [6]:
df['weight_pounds']

0     6.426475
1     6.181762
2     8.750147
3     6.812284
4     7.500126
        ...   
95    7.837433
96    7.561856
97    5.875319
98    5.577695
99    8.811877
Name: weight_pounds, Length: 100, dtype: float64

In [6]:
import pickle


from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib

from preprocess import MySimpleScaler


scaler = MySimpleScaler()
X,y = scaler.preprocess(df)
#print(data_new)

model = RandomForestRegressor(max_depth=2, random_state=0)
model.fit(X, y)

(100, 6)


  from numpy.core.umath_tests import inner1d


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

In [7]:
print(model.feature_importances_)
print(model.score(X,y))

[0.21235293 0.07755402 0.66532064 0.02075033 0.02402208]
0.2951073163844412


In [8]:
X.shape

(100, 5)

In [9]:
joblib.dump(model, 'model.joblib')
with open ('preprocessor.pkl', 'wb') as f:
  pickle.dump(scaler, f)

In [None]:
Deploying a custom prediction routine
To deploy a custom prediction routine to serve predictions from your trained model, do the following:

Create a custom predictor to handle requests
Package your predictor and your preprocessing module
Upload your model artifacts and your custom code to Cloud Storage
Deploy your custom prediction routine to AI Platform

In [10]:
%%writefile predictor.py
import os
import pickle

import numpy as np
from sklearn.externals import joblib

class MyPredictor(object):
  def __init__(self, model, preprocessor):
    self._model = model
    self._preprocessor = preprocessor
    #self._class_names = load_iris().target_names

  def predict(self, instances, **kwargs):
    inputs = np.asarray(instances)
    preprocessed_inputs = self._preprocessor.preprocess(inputs)

  @classmethod
  def from_path(cls, model_dir):
    model_path = os.path.join(model_dir, 'model.joblib')
    model = joblib.load(model_path)

    preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl')
    with open(preprocessor_path, 'rb') as f:
      preprocessor = pickle.load(f)

    return cls(model, preprocessor)

Writing predictor.py


In [11]:
%%writefile setup.py
from setuptools import setup

setup(
    name='my_custom_code',
    version='0.1',
    scripts=['predictor.py', 'preprocess.py'])

Writing setup.py


In [12]:
! python setup.py sdist --formats=gztar

running sdist
running egg_info
creating my_custom_code.egg-info
writing my_custom_code.egg-info/PKG-INFO
writing top-level names to my_custom_code.egg-info/top_level.txt
writing dependency_links to my_custom_code.egg-info/dependency_links.txt
writing manifest file 'my_custom_code.egg-info/SOURCES.txt'
reading manifest file 'my_custom_code.egg-info/SOURCES.txt'
writing manifest file 'my_custom_code.egg-info/SOURCES.txt'
running check


creating my_custom_code-0.1
creating my_custom_code-0.1/my_custom_code.egg-info
copying files to my_custom_code-0.1...
copying README.md -> my_custom_code-0.1
copying predictor.py -> my_custom_code-0.1
copying preprocess.py -> my_custom_code-0.1
copying setup.py -> my_custom_code-0.1
copying my_custom_code.egg-info/PKG-INFO -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/SOURCES.txt -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/dependency_links.txt -> my_custom_code-0.1/my_custom_code.egg-info


In [13]:
# Upload model artifacts and custom code to Cloud Storage
# model.joblib (model artifact)
# preprocessor.pkl (model artifact)
# my_custom_code-0.1.tar.gz (custom code)

! gsutil cp ./dist/my_custom_code-0.1.tar.gz gs://$BUCKET_NAME/custom_prediction_routine/my_custom_code-0.1.tar.gz
! gsutil cp model.joblib preprocessor.pkl gs://$BUCKET_NAME/custom_prediction_routine/model/

Copying file://./dist/my_custom_code-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  1.3 KiB/  1.3 KiB]                                                
Operation completed over 1 objects/1.3 KiB.                                      
Copying file://model.joblib [Content-Type=application/octet-stream]...
Copying file://preprocessor.pkl [Content-Type=application/octet-stream]...      
/ [2 files][  8.4 KiB/  8.4 KiB]                                                
Operation completed over 2 objects/8.4 KiB.                                      


In [14]:
MODEL_NAME = 'CPR_test'
VERSION_NAME = 'v1'

In [15]:
# create your model

! gcloud ai-platform models create $MODEL_NAME \
  --regions $REGION

Created ml engine model [projects/qwiklabs-gcp-01-c2d37df448e7/models/CPR_test].


In [16]:
# --quiet automatically installs the beta component if it isn't already installed 
! gcloud --quiet beta ai-platform versions create $VERSION_NAME \
  --model $MODEL_NAME \
  --runtime-version 1.15 \
  --python-version 3.7 \
  --origin gs://$BUCKET_NAME/custom_prediction_routine/model/ \
  --package-uris gs://$BUCKET_NAME/custom_prediction_routine/my_custom_code-0.1.tar.gz \
  --prediction-class predictor.MyPredictor

Creating version (this might take a few minutes)......done.                    


In [None]:
! pip install --upgrade google-api-python-client

In [None]:
import googleapiclient.discovery

instances = [
  [6.7, 3.1, 4.7, 1.5],
  [4.6, 3.1, 1.5, 0.2],
]

service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID, MODEL_NAME, VERSION_NAME)

response = service.projects().predict(
    name=name,
    body={'instances': instances}
).execute()

if 'error' in response:
    raise RuntimeError(response['error'])
else:
  print(response['predictions'])