## Housing 
- Install `feast` with pip.
- Activate user service account with credentials JSON.
- Hacks to retrieve essential information for deployments and serving.

**NOTE**: This code block might hangs for a long time.

In [2]:
import demo_util
demo_util.notebook_setup()

In [26]:
import importlib
importlib.reload(demo_util)
working_dir = "/home/jovyan/LinearModel"

In [7]:
PROJECT, ZONE, APP_NAME = demo_util.get_project_config()
print('PROJECT =', PROJECT)
print('APP_NAME =', APP_NAME)
print('ZONE =', ZONE)

PROJECT = aliz-development
APP_NAME = kubeflow-asia
ZONE = asia-southeast1-a


In [37]:
import fairing
import sys
import importlib
import deploy_with_fairing
import uuid
import json
import requests
import pandas as pd
import numpy as np
from retrying import retry
from feast.sdk.resources.entity import Entity
from feast.sdk.resources.storage import Storage
from feast.sdk.resources.feature import Feature, Datastore, ValueType
from feast.sdk.resources.feature_set import FeatureSet, FileType
import feast.specs.FeatureSpec_pb2 as feature_pb

from feast.sdk.importer import Importer
from feast.sdk.client import Client

In [9]:
# Connect to the Feast deployment
FEAST_CORE_URL = '10.148.0.99:6565'
FEAST_SERVING_URL = '10.148.0.100:6566'
STAGING_LOCATION = 'gs://kubecon-19-gojek/staging'
fs = Client(core_url=FEAST_CORE_URL,serving_url=FEAST_SERVING_URL, verbose=True)

## Load precomputed feature data

In [10]:
df = pd.read_csv('usa_housing.csv', index_col=False)
df.head()

Unnamed: 0,avg_area_income,avg_area_house_age,avg_area_number_of_rooms,avg_area_number_of_bedrooms,area_population,price,area_code,timestamp
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0,NE 37010-5101,2018-01-01T00:00:00
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0,CA 48958,2018-01-01T00:00:00
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0,WI 06482-3489,2018-01-01T00:00:00
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0,FPO AP 44820,2018-01-01T00:00:00
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5,FPO AE 09386,2018-01-01T00:00:00


## Register entity and features

In [11]:
# Create importer
importer = Importer.from_df(df, 
                           entity='usa_housing', 
                           owner='user@website.com',  
                           staging_location=STAGING_LOCATION,
                           id_column='area_code', 
                           timestamp_column='timestamp',
                           serving_store=Datastore(id='SERVING'),
                           warehouse_store=Datastore(id='WAREHOUSE'))

# Update feature and entity metadata. Ideally you want to update these manually
# so that they contain adequate information for the next user
importer.entity.description = 'entity level description' 
for feature_id in importer.features:
    importer.features[feature_id].description = 'feature level description'
    
# Ingest the feature data into the store
fs.run(importer, apply_features=True, apply_entity=True)

Successfully applied entity with name: usa_housing
---
name: usa_housing
description: entity level description

Successfully applied feature with id: usa_housing.avg_area_income
---
id: usa_housing.avg_area_income
name: avg_area_income
owner: user@website.com
description: feature level description
valueType: DOUBLE
entity: usa_housing
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: usa_housing.avg_area_house_age
---
id: usa_housing.avg_area_house_age
name: avg_area_house_age
owner: user@website.com
description: feature level description
valueType: DOUBLE
entity: usa_housing
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: usa_housing.avg_area_number_of_rooms
---
id: usa_housing.avg_area_number_of_rooms
name: avg_area_number_of_rooms
owner: user@website.com
description: feature level description
valueType: DOUBLE
entity: usa_housing
dataStores:
  serving:
    id: SERV

'feastimport1558316898401'

## Define a Feature Set for this project

In [12]:
ENTITY_ID = 'usa_housing'
TRAINING_FEATURES_SET = [
    'usa_housing.avg_area_income',
    'usa_housing.avg_area_house_age',
    'usa_housing.avg_area_number_of_rooms',
    'usa_housing.avg_area_number_of_bedrooms',
    'usa_housing.area_population',
    'usa_housing.price'
]

feature_set = FeatureSet(entity=ENTITY_ID, 
                         features=TRAINING_FEATURES_SET)

## Retrieve a Training Set from Feast

In [13]:
# Retrieve feature data for training from Feast
dataset = fs.create_dataset(feature_set, "2018-01-01", "2018-01-31")
training_df = fs.download_dataset_to_df(dataset, STAGING_LOCATION)

creating training dataset for features: ['usa_housing.avg_area_income', 'usa_housing.avg_area_house_age', 'usa_housing.avg_area_number_of_rooms', 'usa_housing.avg_area_number_of_bedrooms', 'usa_housing.area_population', 'usa_housing.price']
created dataset usa_housing_1558316967818_20180101_20180131: aliz-development.fs_usa_housing.1558316967818_20180101_20180131


## Train Linear Model

In [14]:
import numpy as np

# Train model
np.set_printoptions(precision=3)
train_data = training_df[[x.split('.')[1] for x in TRAINING_FEATURES_SET]].to_numpy()
train_data[:, len(train_data[0]) - 1] = 1
Y = training_df['price'].to_numpy()

x = np.linalg.lstsq(train_data, Y, rcond=0)[0]
m, b = x[:len(train_data[0])-1], x[len(train_data[0])-1]

print(m, b)

[2.158e+01 1.656e+05 1.207e+05 1.651e+03 1.520e+01] -2637299.033328297


## Local Prediction

In [15]:
SERVING_FEATURES_SET = [
    'usa_housing.avg_area_income',
    'usa_housing.avg_area_house_age',
    'usa_housing.avg_area_number_of_rooms',
    'usa_housing.avg_area_number_of_bedrooms',
    'usa_housing.area_population'
]
serving_fs = FeatureSet(entity=ENTITY_ID, features=SERVING_FEATURES_SET)

def local_predict(id):
    # retrieve features from Feast serving
    features = fs.get_serving_data(serving_fs, entity_keys=[id])
    x = features.to_numpy()[0][1:]
    return sum(m * x) + b

p = local_predict('FPO AE 09386')
print(p)

845388.766291216


## Save the model

In [16]:
import json
import os

MODEL_FILE = 'simple_model.dat'

model = {
    'm': m.tolist(),
    'b': b,
    'FEAST_CORE_URL': FEAST_CORE_URL,
    'FEAST_SERVING_URL': FEAST_SERVING_URL,
    'ENTITY_ID': ENTITY_ID,
    'FEATURES_SET': SERVING_FEATURES_SET,
}

# TODO(gabrielwen): Use PWD instead.
model_path = os.path.join(os.getenv('HOME', '/home'), MODEL_FILE)
print('writing to', model_path)

with open(model_path, 'w+') as f:
    json.dump(model, f)

writing to /home/jovyan/simple_model.dat


## Deploy with Kubeflow

In [17]:
# Need to set up KUBECONFIG. Kubernetes API client depends on it.
!gcloud container clusters get-credentials "$APP_NAME" --zone "$ZONE" --project "$PROJECT"

Fetching cluster endpoint and auth data.
kubeconfig entry generated for kubeflow-asia.


In [18]:
DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(PROJECT)
BASE_IMAGE = 'gcr.io/kubeflow-images-public/fairing-base:v20190516'
SERVING_LABEL = 'kubeflow-fairing-demo'

print('docker registry:', DOCKER_REGISTRY)
print('base image:', BASE_IMAGE)

docker registry: gcr.io/aliz-development/fairing-job
base image: gcr.io/kubeflow-images-public/fairing-base:v20190516


In [19]:
# To disambiguate between different deployments.
serving_label = SERVING_LABEL + '-' + uuid.uuid4().hex[:4]
print('Deploying service with selector', serving_label)

# TODO(gabrielwen): Re-deploy has permission issue.
importlib.reload(deploy_with_fairing)
deploy_with_fairing.deploy(DOCKER_REGISTRY, BASE_IMAGE, serving_label=serving_label)

INFO:root:Using preprocessor: <fairing.preprocessors.base.BasePreProcessor object at 0x7f0097d1c4e0>
INFO:root:Using builder: <fairing.builders.append.append.AppendBuilder object at 0x7f009f3ca978>
INFO:root:Creating docker context: /tmp/fairing.context.tar.gz
INFO:root:Adding files to context: {'gcs_context.py', 'v1_compat_.py', 'tasks.py', 'docker_digest_.py', 'function_shim.py', 'docker_image_.py', 'retry_.py', 'test_base_preprocessor.py', 'test_gcp_creds.py', 'base_builder.py', 'config_test.py', 'HousingTrain.py', 'base.py', 'test_full_notebook_preprocessor.py', 'test_gcp.py', 'cluster.py', 'context_source.py', 'docker_session_.py', 'util_.py', 'test_kubeflow_training.py', 'demo_util.py', 'constants.py', 'v2_compat_.py', 'nested_.py', 'docker.py', 'LabelPrediction.py', 'save_.py', 'test_function_preprocessor.py', 'converted_notebook.py', 'gcp.py', 'docker_image_list_.py', 'conftest.py', 'append_.py', 'monitor_.py', 'simple_model.dat', '__init__.py', 'test_gcs_access.py', 'tfjob.py'

Deploying service with selector kubeflow-fairing-demo-c4cd


INFO:root:Context: /tmp/fairing.context.tar.gz, Adding utils.py at /app/utils.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding gcpserving.py at /app/gcpserving.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding transport_pool_.py at /app/transport_pool_.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding config.py at /app/config.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding notebook_util.py at /app/notebook_util.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding test_running_in_notebooks.py at /app/test_running_in_notebooks.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding test_high_level_apis.py at /app/test_high_level_apis.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding dockerfile.py at /app/dockerfile.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding main.py at /app/main.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding backends.py at /app/backends.py
INFO:root:Context: /tmp/fairing.context.tar.gz, Adding tes

(<fairing.preprocessors.base.BasePreProcessor at 0x7f0097d1c4e0>,
 <fairing.builders.append.append.AppendBuilder at 0x7f009f3ca978>,
 <fairing.deployers.serving.serving.Serving at 0x7f009f612b00>)

## Fetch fairing service endpoint

In [27]:
namespace, service_name = demo_util.get_fairing_endpoint(serving_label)

fairing service: kubeflow/fairing-service-xv9g7


## Serving with Kubeflow

In [35]:
@retry(wait_exponential_multiplier=1000, wait_exponential_max=5000,
       stop_max_delay=2*60*1000)
def predict(url, id):
    pdata={
        'strData': id,
    }
    serialized_data = json.dumps(pdata)
    r = requests.post(url, data={'json':serialized_data}, timeout=5)
    return r

def extract_prediction(content):
    c = json.loads(content)
    return c.get('binData', None)

In [36]:

url = "http://{service_name}.{namespace}.svc.cluster.local:5000/predict".format(
    service_name=service_name,
    namespace=namespace)

r = predict(url, 'CA 48958')
prediction = extract_prediction(r.content)
print('prediction:', prediction)

prediction: 1494937.691618489
