In [1]:
BUCKET_NAME = "qwiklabs-gcp-03-6f04349204cb" #@param {type:"string"}
REGION = "us-central1" #@param {type:"string"}

In [2]:
# Creating a bucket

! gsutil mb -l $REGION gs://$BUCKET_NAME

Creating gs://qwiklabs-gcp-03-6f04349204cb/...
ServiceException: 409 Bucket qwiklabs-gcp-03-6f04349204cb already exists.


In [3]:
# Test access to the bucket

! gsutil ls -al gs://$BUCKET_NAME

In [4]:
# Create SQL query using natality data after the year 2000
query = """
SELECT
  weight_pounds,
  is_male,
  mother_age,
  plurality,
  gestation_weeks,
  FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING))) AS hashmonth
FROM
  publicdata.samples.natality
WHERE year > 2000
"""

In [5]:
# Call BigQuery and examine in dataframe
from google.cloud import bigquery
df = bigquery.Client().query(query + " LIMIT 100").to_dataframe()
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,hashmonth
0,6.68662,True,18,1,43,8904940584331855459
1,9.360828,True,32,1,41,1088037545023002395
2,8.437091,False,30,1,39,5896567601480310696
3,6.124442,False,24,1,40,-6244544205302024223
4,7.12534,False,26,1,41,-8029892925374153452


In [7]:
import pandas as pd



# Get one hot encoding of columns B
one_hot = pd.get_dummies(df['is_male'])
# Drop column B as it is now encoded
df = df.drop('is_male',axis = 1)
# Join the encoded df
df = df.join(one_hot)

In [30]:
df.head()

Unnamed: 0,weight_pounds,mother_age,plurality,gestation_weeks,hashmonth,False,True
0,7.063611,32,1,37.0,7108882242435606404,0,1
1,4.687028,30,3,33.0,-7170969733900686954,0,1
2,7.561856,20,1,39.0,6392072535155213407,0,1
3,7.561856,31,1,37.0,-2126480030009879160,0,1
4,7.312733,32,1,40.0,3408502330831153141,0,1


In [6]:
%%writefile preprocess.py
import numpy as np
import pandas as pd

class MySimpleScaler(object):
 

 def preprocess(self, data):
        
    data = pd.DataFrame(data,columns=['is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth'])
    #data = data[data.weight_pounds  > 0]
    data = data[data.mother_age  > 0]
    data = data[data.plurality > 0]
    data = data[data.gestation_weeks > 0]
    print(data.shape)

    x_cols = ['mother_age', 'plurality', 'gestation_weeks', True]
    # Get one hot encoding of columns B
    one_hot = pd.get_dummies(data['is_male'])
    # Drop column B as it is now encoded
    data = data.drop('is_male',axis = 1)
    # Join the encoded df
    data = data.join(one_hot)


    return data[x_cols]

Overwriting preprocess.py


In [37]:

df = df[df.mother_age  > 0]
df = df[df.plurality > 0]
df = df[df.gestation_weeks > 0]



In [6]:
df['weight_pounds']

0     6.426475
1     6.181762
2     8.750147
3     6.812284
4     7.500126
        ...   
95    7.837433
96    7.561856
97    5.875319
98    5.577695
99    8.811877
Name: weight_pounds, Length: 100, dtype: float64

In [39]:
import numpy as np

a=np.asarray(df[['is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth']])

#df[['is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth']]

np.asarray([[True, 37, 2, 37.0, 7108882242435606404],
          [True, 37, 2, 37.0, 7108882242435606404]], dtype = object)

#list(df[['is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth']])



array([[True, 37, 2, 37.0, 7108882242435606404],
       [True, 37, 2, 37.0, 7108882242435606404]], dtype=object)

In [38]:
print(a.dtype)

object


In [7]:
import pickle
import numpy as np

from sklearn.ensemble import RandomForestRegressor
from sklearn.externals import joblib

from preprocess import MySimpleScaler


scaler = MySimpleScaler()
X = scaler.preprocess(np.asarray(df[['is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth']]))

merged = X.merge(df[['weight_pounds']], left_index=True, right_index=True, how='inner')
#print(data_new)

model = RandomForestRegressor(max_depth=2, random_state=0)
model.fit(X, merged['weight_pounds'])

(100, 5)


  from numpy.core.umath_tests import inner1d


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=0, verbose=0, warm_start=False)

In [8]:
X

Unnamed: 0,mother_age,plurality,gestation_weeks,True
0,18,1,43,1
1,32,1,41,1
2,30,1,39,0
3,24,1,40,0
4,26,1,41,0
...,...,...,...,...
95,28,1,41,1
96,23,1,41,1
97,29,1,37,1
98,33,1,39,0


In [83]:
np.asarray(df)

array([[8.24969784404, True, 24, 1, 41.0, 6691862025345277042],
       [4.31224184472, True, 19, 1, 38.0, 8904940584331855459],
       [6.75055446244, False, 35, 1, 42.0, 3408502330831153141],
       ...,
       [8.000575487979999, True, 23, 1, 39.0, 1088037545023002395],
       [7.06361087448, True, 21, 1, 37.0, 7108882242435606404],
       [6.8122838958, True, 29, 1, 36.0, 1088037545023002395]],
      dtype=object)

In [9]:
print(model.feature_importances_)
print(model.score(X,merged['weight_pounds']))

[0.21235293 0.07755402 0.66532064 0.0447724 ]
0.2951073163844412


In [24]:
import numpy as np
model.predict([[32, 1, 37, 0, 1]])


array([7.16733733])

In [9]:
X.shape

(985, 5)

In [10]:
joblib.dump(model, 'model.joblib')
with open ('preprocessor.pkl', 'wb') as f:
  pickle.dump(scaler, f)

In [None]:
Deploying a custom prediction routine
To deploy a custom prediction routine to serve predictions from your trained model, do the following:

Create a custom predictor to handle requests
Package your predictor and your preprocessing module
Upload your model artifacts and your custom code to Cloud Storage
Deploy your custom prediction routine to AI Platform

In [11]:
%%writefile predictor.py
import os
import pickle

import numpy as np
from sklearn.externals import joblib

class MyPredictor(object):
  def __init__(self, model, preprocessor):
    self._model = model
    self._preprocessor = preprocessor
    #self._class_names = load_iris().target_names

  def predict(self, instances, **kwargs):
    inputs = np.asarray(instances, dtype = object)
    preprocessed_inputs = self._preprocessor.preprocess(inputs)
    # print(preprocessed_inputs)
    outputs = self._model.predict(preprocessed_inputs)
    return outputs

  @classmethod
  def from_path(cls, model_dir):
    model_path = os.path.join(model_dir, 'model.joblib')
    model = joblib.load(model_path)

    preprocessor_path = os.path.join(model_dir, 'preprocessor.pkl')
    with open(preprocessor_path, 'rb') as f:
      preprocessor = pickle.load(f)

    return cls(model, preprocessor)

Overwriting predictor.py


In [12]:
%%writefile setup.py
from setuptools import setup

setup(
    name='my_custom_code',
    version='0.1',
    scripts=['predictor.py', 'preprocess.py'])

Overwriting setup.py


In [13]:
! python setup.py sdist --formats=gztar

running sdist
running egg_info
writing my_custom_code.egg-info/PKG-INFO
writing top-level names to my_custom_code.egg-info/top_level.txt
writing dependency_links to my_custom_code.egg-info/dependency_links.txt
reading manifest file 'my_custom_code.egg-info/SOURCES.txt'
writing manifest file 'my_custom_code.egg-info/SOURCES.txt'
running check


creating my_custom_code-0.1
creating my_custom_code-0.1/my_custom_code.egg-info
copying files to my_custom_code-0.1...
copying README.md -> my_custom_code-0.1
copying predictor.py -> my_custom_code-0.1
copying preprocess.py -> my_custom_code-0.1
copying setup.py -> my_custom_code-0.1
copying my_custom_code.egg-info/PKG-INFO -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/SOURCES.txt -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/dependency_links.txt -> my_custom_code-0.1/my_custom_code.egg-info
copying my_custom_code.egg-info/top_level.txt -> my_custom_code-0.1/my_custom_code.egg-info
W

In [14]:
# Upload model artifacts and custom code to Cloud Storage
# model.joblib (model artifact)
# preprocessor.pkl (model artifact)
# my_custom_code-0.1.tar.gz (custom code)

! gsutil cp ./dist/my_custom_code-0.1.tar.gz gs://$BUCKET_NAME/custom_prediction_routine/my_custom_code-0.1.tar.gz
! gsutil cp model.joblib preprocessor.pkl gs://$BUCKET_NAME/custom_prediction_routine/model/

Copying file://./dist/my_custom_code-0.1.tar.gz [Content-Type=application/x-tar]...
/ [1 files][  1.3 KiB/  1.3 KiB]                                                
Operation completed over 1 objects/1.3 KiB.                                      
Copying file://model.joblib [Content-Type=application/octet-stream]...
Copying file://preprocessor.pkl [Content-Type=application/octet-stream]...      
/ [2 files][  8.4 KiB/  8.4 KiB]                                                
Operation completed over 2 objects/8.4 KiB.                                      


In [15]:
MODEL_NAME = 'CPR_test'
VERSION_NAME = 'v1'

In [16]:
# create your model

! gcloud ai-platform models create $MODEL_NAME \
  --regions $REGION

Created ml engine model [projects/qwiklabs-gcp-03-6f04349204cb/models/CPR_test].


In [17]:
# --quiet automatically installs the beta component if it isn't already installed 
! gcloud --quiet beta ai-platform versions create $VERSION_NAME \
  --model $MODEL_NAME \
  --runtime-version 1.13 \
  --python-version 3.5 \
  --origin gs://$BUCKET_NAME/custom_prediction_routine/model/ \
  --package-uris gs://$BUCKET_NAME/custom_prediction_routine/my_custom_code-0.1.tar.gz \
  --prediction-class predictor.MyPredictor

Creating version (this might take a few minutes)......done.                    


In [18]:
! pip install --upgrade google-api-python-client

Requirement already up-to-date: google-api-python-client in /usr/local/lib/python3.5/dist-packages (1.7.11)


In [19]:
PROJECT_ID = 'qwiklabs-gcp-03-6f04349204cb'

In [20]:
! gcloud config set project $PROJECT_ID

Updated property [core/project].


In [21]:
import googleapiclient.discovery

instances = [[True, 37, 1, 37.0, 7108882242435606404]]


#instances = np.asarray(df[['mother_age', 'plurality', 'gestation_weeks','is_male']].head())

service = googleapiclient.discovery.build('ml', 'v1')
name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID, MODEL_NAME, VERSION_NAME)



In [22]:
response = service.projects().predict(
    name=name,
    body={'instances': instances}
).execute()



In [23]:
response

{'error': 'Failed to json encode the prediction response: array([6.78647278]) is not JSON serializable If your output is not text, did you forget to suffix the alias of your output tensor with _bytes?'}

In [27]:
df.columns

Index(['weight_pounds', 'is_male', 'mother_age', 'plurality',
       'gestation_weeks', 'hashmonth'],
      dtype='object')

In [42]:
instances

[[32, True, 37, 1, 37.0, 7108882242435606404]]

In [44]:
import pandas as pd
['weight_pounds', 'is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth']

pd.DataFrame(np.asarray(instances),columns=['weight_pounds', 'is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth'])


Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,hashmonth
0,32.0,1.0,37.0,1.0,37.0,7.108882e+18


In [26]:
 def preprocess(data):
        
    data = pd.DataFrame(data,columns=['is_male', 'mother_age', 'plurality','gestation_weeks', 'hashmonth'])
    #data = data[data.weight_pounds  > 0]
    data = data[data.mother_age  > 0]
    data = data[data.plurality > 0]
    data = data[data.gestation_weeks > 0]
    print(data.shape)

    x_cols = ['mother_age', 'plurality', 'gestation_weeks', True]
    # Get one hot encoding of columns B
    one_hot = pd.get_dummies(data['is_male'])
    # Drop column B as it is now encoded
    data = data.drop('is_male',axis = 1)
    # Join the encoded df
    data = data.join(one_hot)
    


    return data[x_cols]

In [29]:
import pandas as pd
import numpy as np
a = preprocess(np.asarray([[True, 37, 2, 37.0, 7108882242435606404]],dtype = object))
a

(1, 5)


Unnamed: 0,mother_age,plurality,gestation_weeks,True
0,37,2,37,1


In [62]:
np.asarray([[True, 37, 2, 37.0, 7108882242435606404]],dtype = object)

array([[True, 37, 2, 37.0, 7108882242435606404]], dtype=object)

In [42]:
np.asarray([[True, 37, 2, 37.0, 7108882242435606404]])

array([[1.00000000e+00, 3.70000000e+01, 2.00000000e+00, 3.70000000e+01,
        7.10888224e+18]])

In [27]:
def predict(instances, **kwargs):
    inputs = np.asarray(instances,dtype = object)
    inputs
    preprocessed_inputs = preprocess(inputs)
    # print(preprocessed_inputs)
    outputs = preprocessed_inputs
    return outputs

In [32]:
inputs = np.asarray([[True, 37, 1, 37.0, 7108882242435606404],[True, 37, 1, 37.0, 7108882242435606404]],dtype = object)

In [33]:
print(inputs)

[[True 37 1 37.0 7108882242435606404]
 [True 37 1 37.0 7108882242435606404]]


In [34]:
o = predict(instances = inputs)

(2, 5)


In [35]:
list(model.predict(o))

[6.786472776317309, 6.786472776317309]