## Hacky Boilerplates
- Install `feast` with pip.
- Activate user service account with credentials JSON.
- Hacks to retrieve essential information for deployments and serving.

**NOTE**: This code block might hangs for a long time.

In [1]:
# Install feast
!pip install feast

# Retrieve user service account.
!gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS

[33mYou are using pip version 19.0.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Activated service account credentials for: [kubeflow-asia-user@aliz-development.iam.gserviceaccount.com]


In [2]:
import json
import os
import re
import subprocess

cred_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
cred = {}
with open(cred_path, 'r') as c:
    cred = json.load(c)

PROJECT = cred['project_id']
APP_NAME = re.search('([a-z\-]+)-user'.format(PROJECT),
                     cred['client_email']).group(1)
p = subprocess.Popen(['gcloud', 'container', 'clusters', 'list',
                      '--filter', 'name=%s' % APP_NAME, '--format', 'json'],
                    stdout=subprocess.PIPE)
out, _ = p.communicate()
config = json.loads(out)[0]
ZONE = config['zone']

print('PROJECT =', PROJECT)
print('APP_NAME =', APP_NAME)
print('ZONE =', ZONE)

PROJECT = aliz-development
APP_NAME = kubeflow-asia
ZONE = asia-southeast1-a


In [3]:
import pandas as pd
import numpy as np

from feast.sdk.resources.entity import Entity
from feast.sdk.resources.storage import Storage
from feast.sdk.resources.feature import Feature, Datastore, ValueType
from feast.sdk.resources.feature_set import FeatureSet, FileType
import feast.specs.FeatureSpec_pb2 as feature_pb

from feast.sdk.importer import Importer

from feast.sdk.client import Client

In [4]:
# Connect to the local Feast deployment
# FEAST_CORE_URL = '10.148.0.46:30576'
# TODO(gabrielwen): Use internal DNS?
FEAST_CORE_URL = '10.148.0.99:6565'
FEAST_SERVING_URL = '10.148.0.100:6566'
STAGING_LOCATION = 'gs://kubecon-19-gojek/staging'
fs = Client(core_url=FEAST_CORE_URL,serving_url=FEAST_SERVING_URL, verbose=True)

## Load precomputed feature data

In [5]:
df = pd.read_csv('usa_housing.csv', index_col=False)
df['timestamp'] = pd.to_datetime(df['timestamp'])

df.head()

Unnamed: 0,avg_area_income,avg_area_house_age,avg_area_number_of_rooms,avg_area_number_of_bedrooms,area_population,price,area_code,timestamp
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0,NE 37010-5101,2018-01-01
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0,CA 48958,2018-01-01
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0,WI 06482-3489,2018-01-01
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0,FPO AP 44820,2018-01-01
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5,FPO AE 09386,2018-01-01


## Register entity and features

In [6]:
# from feast.sdk.resources.entity import Entity
# from feast.sdk.resources.feature import Feature


# # Register a simple entity
# demo_entity = Entity(name='demo_entity', description='My simple demo entity')
# fs.apply(demo_entity)

# # Register five numeric features on this entity
# from feast.sdk.resources.feature import Feature

# my_simple

# Now that we have finished creating our features, we ingest them into feast

# Create importer
importer = Importer.from_df(df, 
                           entity='usa_housing', 
                           owner='user@website.com',  
                           staging_location=STAGING_LOCATION,
                           id_column='area_code', 
                           timestamp_column='timestamp',
                           serving_store=Datastore(id='SERVING'),
                           warehouse_store=Datastore(id='WAREHOUSE'))

# Update feature and entity metadata. Ideally you want to update these manually
# so that they contain adequate information for the next user
importer.entity.description = 'entity level description' 
for feature_id in importer.features:
    importer.features[feature_id].description = 'feature level description'
    
# Ingest the feature data into the store
fs.run(importer, apply_features=True, apply_entity=True)


Successfully applied entity with name: usa_housing
---
name: usa_housing
description: entity level description

Successfully applied feature with id: usa_housing.avg_area_income
---
id: usa_housing.avg_area_income
name: avg_area_income
owner: user@website.com
description: feature level description
valueType: DOUBLE
entity: usa_housing
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: usa_housing.avg_area_house_age
---
id: usa_housing.avg_area_house_age
name: avg_area_house_age
owner: user@website.com
description: feature level description
valueType: DOUBLE
entity: usa_housing
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: usa_housing.avg_area_number_of_rooms
---
id: usa_housing.avg_area_number_of_rooms
name: avg_area_number_of_rooms
owner: user@website.com
description: feature level description
valueType: DOUBLE
entity: usa_housing
dataStores:
  serving:
    id: SERV

'feastimport1558033510821'

## Define a Feature Set for this project

In [None]:
ENTITY_ID = 'usa_housing'
FEATURES_SET = [
    'usa_housing.avg_area_income',
    'usa_housing.avg_area_house_age',
    'usa_housing.avg_area_number_of_rooms',
    'usa_housing.avg_area_number_of_bedrooms',
    'usa_housing.area_population',
]

feature_set = FeatureSet(entity=ENTITY_ID, 
                         features=FEATURES_SET)

## Retrieve a Training Set from Feast

In [None]:
# Retrieve feature data for training from Feast
dataset = fs.create_dataset(feature_set, "2018-01-01", "2018-01-31")
training_df = fs.download_dataset_to_df(dataset, STAGING_LOCATION)

## Train Linear Model

In [None]:
import numpy as np

# Train model
train_data = training_df[[x.split('.')[1] for x in FEATURES_SET]].to_numpy()
A = np.insert(train_data, len(train_data[0]), 1, axis=1)
Y = training_df['price'].to_numpy()

x = np.linalg.lstsq(A, Y, rcond=0)[0]
m, b = x[:len(A[0])-1], x[len(A[0])-1]

print(m, b)

## Local Prediction

In [None]:
serving_fs = FeatureSet(entity=ENTITY_ID, features=FEATURES_SET)

def local_predict(id):
    # retrieve features from Feast serving
    features = fs.get_serving_data(serving_fs, entity_keys=[id])
    x = features.to_numpy()[0][1:]
    return sum(m * x) + b

p = local_predict('FPO AE 09386')
np.set_printoptions(precision=3)

print(p)

## Save the model

In [None]:
import json
import os

MODEL_FILE = 'simple_model.dat'

model = {
    'm': m.tolist(),
    'b': b,
    'FEAST_CORE_URL': FEAST_CORE_URL,
    'FEAST_SERVING_URL': FEAST_SERVING_URL,
    'ENTITY_ID': ENTITY_ID,
    'FEATURES_SET': FEATURES_SET,
}

# TODO(gabrielwen): Use PWD instead.
model_path = os.path.join(os.getenv('HOME', '/home'), MODEL_FILE)
print('writing to', model_path)

with open(model_path, 'w+') as f:
    json.dump(model, f)

## Deploy with Kubeflow

In [7]:
# Register for docker credential. Needed for docker image pushes.
!gcloud auth configure-docker --quiet

# Retrieve user service account.
!gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS

`docker` and `docker-credential-gcloud` need to be in the same PATH in order to work correctly together.
gcloud's Docker credential helper can be configured but it will not work until this is corrected.
gcloud credential helpers already registered correctly.
Activated service account credentials for: [kubeflow-asia-user@aliz-development.iam.gserviceaccount.com]


In [8]:
import fairing
import sys
import importlib

DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(PROJECT)
BASE_IMAGE = 'gcr.io/kubeflow-images-public/fairing-base:v20190516'
SERVING_LABEL = 'kubeflow-fairing-demo'

print('docker registry:', DOCKER_REGISTRY)
print('base image:', BASE_IMAGE)

docker registry: gcr.io/aliz-development/fairing-job
base image: gcr.io/kubeflow-images-public/fairing-base:v20190516


In [9]:
import deploy_with_fairing
import uuid

# To disambiguate between different deployments.
serving_label = SERVING_LABEL + '-' + uuid.uuid4().hex[:4]
print('Deploying service with selector', serving_label)

# TODO(gabrielwen): Re-deploy has permission issue.
importlib.reload(deploy_with_fairing)
deploy_with_fairing.deploy(DOCKER_REGISTRY, BASE_IMAGE, serving_label=serving_label)

Building image...
Loading Docker credentials for repository 'gcr.io/kubeflow-images-public/fairing-base:v20190516'
Invoking 'docker-credential-gcloud' to obtain Docker credentials.


Deploying service with selector kubeflow-fairing-demo-a1db


Successfully obtained Docker credentials.
Image successfully built in 1.8694912969949655s.
Pushing image gcr.io/aliz-development/fairing-job/fairing-job:10591BC2...
Loading Docker credentials for repository 'gcr.io/aliz-development/fairing-job/fairing-job:10591BC2'
Invoking 'docker-credential-gcloud' to obtain Docker credentials.
Successfully obtained Docker credentials.
Uploading gcr.io/aliz-development/fairing-job/fairing-job:10591BC2
Layer sha256:5d71636fb824265e30ff34bf20737c9cdc4f5af28b6bce86f08215c55b89bfab exists, skipping
Layer sha256:46b64f8f5cd275ca04071c8cd36170990c26b5cce6bcb440579cb4a9094fc7b4 exists, skipping
Layer sha256:620aea26e85367b08cdf1f6768491fb44df6a2a71f7d663f835b1692e849c3ee exists, skipping
Layer sha256:a8c5303780550b746a4781e5e4cd893121d8019e971414a2a1273d54486b4eb9 exists, skipping
Layer sha256:425317e6171bfb0c70b2fa348dfcc0ae2166734273450aa05aa04be43b9278d7 exists, skipping
Layer sha256:d6341e30912f12f56e18564a3b582853f65376766f5f9d641a68a724ed6db88f exists

Waiting for prediction endpoint to come up...


## Fetch fairing service endpoint

In [10]:
# Need to set up KUBECONFIG. Kubernetes API client depends on it.
!gcloud container clusters get-credentials "$APP_NAME" --zone "$ZONE" --project "$PROJECT"

Fetching cluster endpoint and auth data.
kubeconfig entry generated for kubeflow-asia.


In [11]:
import subprocess
from kubernetes import client, config

config.load_kube_config()
c = client.Configuration()
client.Configuration.set_default(c)

v1 = client.CoreV1Api()
body = client.V1Service()
label_selector = 'serving=%s' % serving_label
resp = v1.list_service_for_all_namespaces(label_selector=label_selector)

service_name = resp.items[0].metadata.name
namespace = resp.items[0].metadata.namespace

print('fairing service: {0}/{1}'.format(namespace, service_name))

fairing service: kubeflow/fairing-service-snxqf


## Serving with Kubeflow

In [18]:
import json
import requests

def predict(url, id):
    pdata={
        'strData': id,
    }
    serialized_data = json.dumps(pdata)
    r = requests.post(url, data={'json':serialized_data})
    return r

def extract_prediction_array(content):
    c = json.loads(content)
    return np.array(c.get('data', {}).get('ndarray', []))

In [19]:
import numpy as np
import pprint

url = "http://{service_name}.{namespace}.svc.cluster.local:5000/predict".format(
    service_name=service_name,
    namespace=namespace)

r = predict(url, 'FPO AE 09386')
prediction = extract_prediction_array(r.content)
print('prediction:')
pprint.pprint(prediction)

prediction:
array([-1343000.21557071, -1802396.56798744, -1691398.90442468,
       -2630314.71512979, -2236696.9639044 ])
