## Hacky Boilerplates
- Install `feast` with pip.
- Activate user service account with credentials JSON.
- Hacks to retrieve essential information for deployments and serving.

**NOTE**: This code block might hangs for a long time.

In [1]:
# Install feast
!pip install feast

# Retrieve user service account.
!gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS

[33mYou are using pip version 19.0.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Activated service account credentials for: [kubeflow-user@aliz-development.iam.gserviceaccount.com]


In [2]:
import json
import os
import re
import subprocess

cred_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
cred = {}
with open(cred_path, 'r') as c:
    cred = json.load(c)

PROJECT = cred['project_id']
APP_NAME = re.search('([a-z\-]+)-user'.format(PROJECT),
                     cred['client_email']).group(1)
p = subprocess.Popen(['gcloud', 'container', 'clusters', 'list',
                      '--filter', 'name=%s' % APP_NAME, '--format', 'json'],
                    stdout=subprocess.PIPE)
out, _ = p.communicate()
config = json.loads(out)[0]
ZONE = config['zone']

print('PROJECT =', PROJECT)
print('APP_NAME =', APP_NAME)
print('ZONE =', ZONE)

PROJECT = aliz-development
APP_NAME = kubeflow
ZONE = us-central1-a


In [3]:
import pandas as pd
import numpy as np

from feast.sdk.resources.entity import Entity
from feast.sdk.resources.storage import Storage
from feast.sdk.resources.feature import Feature, Datastore, ValueType
from feast.sdk.resources.feature_set import FeatureSet, FileType
import feast.specs.FeatureSpec_pb2 as feature_pb

from feast.sdk.importer import Importer

from feast.sdk.client import Client

In [17]:
# Connect to the local Feast deployment
# FEAST_CORE_URL = '10.148.0.46:30576'
FEAST_CORE_URL = 'localhost:6565'
FEAST_SERVING_URL = 'localhost:6566'
STAGING_LOCATION = 'gs://kubecon-19-gojek/staging'
fs = Client(core_url=FEAST_CORE_URL,serving_url=FEAST_SERVING_URL, verbose=True)

## Load precomputed feature data

In [18]:
df = pd.read_csv('data.csv', names=['entity','datetime','feature1','feature2','feature3','feature4'], index_col=False)
df['datetime'] = pd.to_datetime(df['datetime'])

df.head()

Unnamed: 0,entity,datetime,feature1,feature2,feature3,feature4
0,0,2018-01-01 00:00:00,0.16655,0.077912,8,1
1,0,2018-01-01 00:01:00,0.350554,0.378997,4,7
2,0,2018-01-01 00:02:00,0.922618,0.317972,8,6
3,0,2018-01-01 00:03:00,0.065824,0.055651,9,1
4,0,2018-01-01 00:04:00,0.847057,0.213472,2,10


## Register entity and features

In [19]:
# from feast.sdk.resources.entity import Entity
# from feast.sdk.resources.feature import Feature


# # Register a simple entity
# demo_entity = Entity(name='demo_entity', description='My simple demo entity')
# fs.apply(demo_entity)

# # Register five numeric features on this entity
# from feast.sdk.resources.feature import Feature

# my_simple

# Now that we have finished creating our features, we ingest them into feast

# Create importer
importer = Importer.from_df(df, 
                           entity='simple_entity', 
                           owner='user@website.com',  
                           staging_location=STAGING_LOCATION,
                           id_column='entity', 
                           timestamp_column='datetime',
                           serving_store=Datastore(id='SERVING'),
                           warehouse_store=Datastore(id='WAREHOUSE'))

# Update feature and entity metadata. Ideally you want to update these manually
# so that they contain adequate information for the next user
importer.entity.description = 'entity level description' 
for feature_id in importer.features:
    importer.features[feature_id].description = 'feature level description'
    
# Ingest the feature data into the store
fs.run(importer, apply_features=True, apply_entity=True)


_Rendezvous: <_Rendezvous of RPC that terminated with:
	status = StatusCode.UNAVAILABLE
	details = "Connect Failed"
	debug_error_string = "{"created":"@1557874937.166133252","description":"Failed to create subchannel","file":"src/core/ext/filters/client_channel/client_channel.cc","file_line":2267,"referenced_errors":[{"created":"@1557874937.166118002","description":"Pick Cancelled","file":"src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc","file_line":242,"referenced_errors":[{"created":"@1557874937.165866183","description":"Connect Failed","file":"src/core/ext/filters/client_channel/subchannel.cc","file_line":962,"grpc_status":14,"referenced_errors":[{"created":"@1557874937.165817133","description":"Failed to connect to remote host: FD Shutdown","file":"src/core/lib/iomgr/lockfree_event.cc","file_line":194,"os_error":"Timeout occurred","referenced_errors":[{"created":"@1557874937.165787116","description":"connect() timed out","file":"src/core/lib/iomgr/tcp_client_posix.cc","file_line":119}],"target_address":"ipv4:10.148.0.99:6565"}]}]}]}"
>

## Define a Feature Set for this project

In [None]:
feature_set = FeatureSet(
                entity="simple_entity",
                features=[
                 "simple_entity.feature1",
                 "simple_entity.feature2",
                 "simple_entity.feature3",
                 "simple_entity.feature4",
                ]
)

## Train Linear Model

In [None]:
import numpy as np

# Retrieve feature data for training from Feast
dataset = fs.create_dataset(feature_set, "2018-01-01", "2018-01-31")
feast_df = fs.download_dataset_to_df(dataset, STAGING_LOCATION)
feast_df.head()

# Train model
train_data = np.random.randint(1, high=100, size=(200, 100))
A = np.insert(train_data, 100, 1, axis=1)
Y = np.random.randint(1, high=100, size=200)

x = np.linalg.lstsq(A, Y, rcond=0)[0]
m, b = x[:100], x[100]

## Local Prediction

In [None]:
# retrieve features from Feast serving
entity_id = '3' # this would typically be the user id
features = fs.get_serving_data(feature_set,entity_keys=[entity_id])
print(features)

# we should also probably change x to "id"
def local_predict(x):
    # normally the Feast client would be inserted here.
    return m * x + b

feature = np.random.randint(1, high=100, size=100)
p = local_predict(feature)
np.set_printoptions(precision=3)
print(p)

In [None]:
# retrieve features from Feast serving
entity_id = '3' # this would typically be the user id
features = fs.get_serving_data(feature_set,entity_keys=[entity_id])
print(features)

# we should also probably change x to "id"
def local_predict(id):
    fs.get_serving_data(feature_set, entity_keys=[id])
    # normally the Feast client would be inserted here.
    return m * x + b

feature = np.random.randint(1, high=100, size=100)
p = local_predict(feature)
np.set_printoptions(precision=3)
print(p)

## Save the model

In [None]:
import json
import os

MODEL_FILE = 'simple_model.dat'

model = {
    'm': m.tolist(),
    'b': b,
}

model_path = os.path.join(os.getenv('HOME', '/home'), MODEL_FILE)
print('writing to', model_path)

with open(model_path, 'w+') as f:
    json.dump(model, f)

## Deploy with Kubeflow

In [None]:
import fairing
import sys
import importlib

DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(PROJECT)
BASE_IMAGE = 'gcr.io/kubeflow-images-public/fairing-base:v20190510'
SERVING_LABEL = 'kubeflow-fairing-demo'

print('docker registry:', DOCKER_REGISTRY)
print('base image:', BASE_IMAGE)

In [None]:
import deploy_with_fairing
import uuid

# To disambiguate between different deployments.
serving_label = SERVING_LABEL + '-' + uuid.uuid4().hex[:4]
print('Deploying service with selector', serving_label)

# Register for docker credential. Needed for docker image pushes.
_ = subprocess.call(['gcloud auth configure-docker --quiet'], shell=True)

importlib.reload(deploy_with_fairing)
deploy_with_fairing.deploy(DOCKER_REGISTRY, BASE_IMAGE, serving_label=serving_label)

In [16]:
import subprocess
from kubernetes import client, config

# Need to set up KUBECONFIG. Kubernetes API client depends on it.
subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', APP_NAME,
                 '--zone', ZONE, '--project', PROJECT])
config.load_kube_config()
c = client.Configuration()
client.Configuration.set_default(c)

v1 = client.CoreV1Api()
body = client.V1Service()
label_selector = 'serving=%s' % serving_label
resp = v1.list_service_for_all_namespaces(label_selector=label_selector)

service_name = resp.items[0].metadata.name
namespace = resp.items[0].metadata.namespace

print('fairing service: {0}/{1}'.format(namespace, service_name))

NameError: name 'serving_label' is not defined

## Serving with Kubeflow

In [None]:
import json
import requests

def predict(url, data, feature_names=None):
    pdata={
        "data": {
            "names":feature_names,
            "tensor": {
                "shape": np.asarray(data.shape).tolist(),
                "values": data.flatten().tolist(),
            },
        }
    }
    serialized_data = json.dumps(pdata)
    r = requests.post(url, data={'json':serialized_data})
    return r

def extract_prediction_array(content):
    c = json.loads(content)
    return np.array(c.get('data', {}).get('tensor', {}).get('values'))

In [None]:
import numpy as np
import pprint

url = "http://{service_name}.{namespace}.svc.cluster.local:5000/predict".format(
    service_name=service_name,
    namespace=namespace)

data = np.random.randint(1, high=100, size=100)
r = predict(url, data)

prediction = extract_prediction_array(r.content)
print('prediction:')
pprint.pprint(prediction)