## Hacky Boilerplates
- Install `feast` with pip.
- Activate user service account with credentials JSON.
- Hacks to retrieve essential information for deployments and serving.

**NOTE**: This code block might hangs for a long time.

In [None]:
!pip install feast

In [20]:
import pandas as pd
import numpy as np

from feast.sdk.resources.entity import Entity
from feast.sdk.resources.storage import Storage
from feast.sdk.resources.feature import Feature, Datastore, ValueType
from feast.sdk.resources.feature_set import FeatureSet, FileType
import feast.specs.FeatureSpec_pb2 as feature_pb

from feast.sdk.importer import Importer

from feast.sdk.client import Client

In [50]:
# Connect to the local Feast deployment
# FEAST_CORE_URL = '10.148.0.46:30576'
FEAST_CORE_URL = 'localhost:6565'
STAGING_LOCATION = 'gs://kubecon-19-gojek/staging'
fs = Client(core_url=FEAST_CORE_URL, verbose=True)

In [9]:
import json
import os
import re
import subprocess

from googleapiclient import discovery
from oauth2client.client import GoogleCredentials

# Install dependencies
_ = subprocess.call(['pip', 'install', 'feast'], shell=True)
# Retrieve user service account.
_ = subprocess.call(['gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS'],
                    shell=True)

# Create KUBECONFIG. Use credential file to retrieve project/deployment names.
cred_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
cred = {}
with open(cred_path, 'r') as c:
    cred = json.load(c)

PROJECT = cred['project_id']
APP_NAME = re.search('([a-z\-]+)-user'.format(PROJECT),
                     cred['client_email']).group(1)

p = subprocess.Popen(['gcloud', 'container', 'clusters', 'list',
                      '--filter', 'name=%s' % APP_NAME, '--format', 'json'],
                    stdout=subprocess.PIPE)
out, _ = p.communicate()
config = json.loads(out)[0]
ZONE = config['zone']

print('PROJECT =', PROJECT)
print('APP_NAME =', APP_NAME)
print('ZONE =', ZONE)

TypeError: expected str, bytes or os.PathLike object, not NoneType

## Load precomputed feature data

In [62]:
df = pd.read_csv('data.csv', names=['entity','datetime','feature1','feature2','feature3','feature4'], index_col=False)
df['datetime'] = pd.to_datetime(df['datetime'])

df.head()

Unnamed: 0,entity,datetime,feature1,feature2,feature3,feature4
0,0,2018-01-01 00:00:00,0.16655,0.077912,8,1
1,0,2018-01-01 00:01:00,0.350554,0.378997,4,7
2,0,2018-01-01 00:02:00,0.922618,0.317972,8,6
3,0,2018-01-01 00:03:00,0.065824,0.055651,9,1
4,0,2018-01-01 00:04:00,0.847057,0.213472,2,10


## Register entity and features

In [63]:
# from feast.sdk.resources.entity import Entity
# from feast.sdk.resources.feature import Feature


# # Register a simple entity
# demo_entity = Entity(name='demo_entity', description='My simple demo entity')
# fs.apply(demo_entity)

# # Register five numeric features on this entity
# from feast.sdk.resources.feature import Feature

# my_simple

# Now that we have finished creating our features, we ingest them into feast

# Initialise client
# fs = Client(core_url=FEAST_CORE_URL, verbose=True)

serving_ds=Datastore(id='SERVING')
warehouse_ds=Datastore(id='WAREHOUSE')

# Create importer
importer = Importer.from_df(df, 
                           entity='simple_entity', 
                           owner='user@website.com',  
                           staging_location=STAGING_LOCATION,
                           id_column='entity', 
                           timestamp_column='dt',
                           serving_store=serving_ds,
                           warehouse_store=warehouse_ds)

# Update feature and entity metadata. Ideally you want to update these manually
# so that they contain adequate information for the next user
importer.entity.description = 'nyc taxi dataset' 
for feature_id in importer.features:
    importer.features[feature_id].description = 'nyc taxi dataset'
    
# Ingest the feature data into the store
fs.run(importer, apply_features=True, apply_entity=True)


Successfully applied entity with name: simple_entity
---
name: simple_entity
description: nyc taxi dataset

Successfully applied feature with id: simple_entity.datetime
---
id: simple_entity.datetime
name: datetime
owner: user@website.com
description: nyc taxi dataset
valueType: TIMESTAMP
entity: simple_entity
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: simple_entity.feature1
---
id: simple_entity.feature1
name: feature1
owner: user@website.com
description: nyc taxi dataset
valueType: DOUBLE
entity: simple_entity
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: simple_entity.feature2
---
id: simple_entity.feature2
name: feature2
owner: user@website.com
description: nyc taxi dataset
valueType: DOUBLE
entity: simple_entity
dataStores:
  serving:
    id: SERVING
  warehouse:
    id: WAREHOUSE

Successfully applied feature with id: simple_entity.feature3
---
id: simp

KeyError: 'dt'

In [53]:
dir(Importer)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'describe',
 'dump',
 'entity',
 'features',
 'from_bq',
 'from_csv',
 'from_df',
 'remote_path',
 'require_staging',
 'size',
 'source',
 'spec',
 'stage']

## Linear Model

In [8]:
import numpy as np

train_data = np.random.randint(1, high=100, size=(200, 100))
A = np.insert(train_data, 100, 1, axis=1)
Y = np.random.randint(1, high=100, size=200)

x = np.linalg.lstsq(A, Y, rcond=0)[0]
m, b = x[:100], x[100]

[64 53 27 30 74 90 50 62 64 62 35 46 51 22 41 89 85  5 61 40 52  6 40 13
 86 68  1 52 51  9 46  5 64 81 50 59 80 52 38 53  7 89 66 69 61 99  2 94
 73 55 14 77 41 79 89 71 89 32 91 26 52 96 31 26 13 70 92 78 31 46 10 45
 64 69  5 15 61 99 95 82 57 19  3 64 90 44 49 89 49 82 55 90 27 18  1 55
 14 91 48 62]


## Local Prediction

In [None]:
def local_predict(x):
    return m * x + b

feature = np.random.randint(1, high=100, size=100)
p = local_predict(feature)
np.set_printoptions(precision=3)
print(p)

## Save the model

In [None]:
import json
import os

MODEL_FILE = 'simple_model.dat'

model = {
    'm': m.tolist(),
    'b': b,
}

model_path = os.path.join(os.getenv('HOME', '/home'), MODEL_FILE)
print('writing to', model_path)

with open(model_path, 'w+') as f:
    json.dump(model, f)

## Deploy with Kubeflow

In [None]:
import fairing
import sys
import importlib

DOCKER_REGISTRY = 'gcr.io/{}/fairing-job'.format(PROJECT)
BASE_IMAGE = 'gcr.io/kubeflow-images-public/fairing-base:v20190510'
SERVING_LABEL = 'kubeflow-fairing-demo'

print('docker registry:', DOCKER_REGISTRY)
print('base image:', BASE_IMAGE)

In [None]:
import deploy_with_fairing
import uuid

# To disambiguate between different deployments.
serving_label = SERVING_LABEL + '-' + uuid.uuid4().hex[:4]
print('Deploying service with selector', serving_label)

# Register for docker credential. Needed for docker image pushes.
_ = subprocess.call(['gcloud auth configure-docker --quiet'], shell=True)

importlib.reload(deploy_with_fairing)
deploy_with_fairing.deploy(DOCKER_REGISTRY, BASE_IMAGE, serving_label=serving_label)

In [None]:
import subprocess
from kubernetes import client, config

# Need to set up KUBECONFIG. Kubernetes API client depends on it.
subprocess.call(['gcloud', 'container', 'clusters', 'get-credentials', APP_NAME,
                 '--zone', ZONE, '--project', PROJECT])
config.load_kube_config()
c = client.Configuration()
client.Configuration.set_default(c)

v1 = client.CoreV1Api()
body = client.V1Service()
label_selector = 'serving=%s' % serving_label
resp = v1.list_service_for_all_namespaces(label_selector=label_selector)

service_name = resp.items[0].metadata.name
namespace = resp.items[0].metadata.namespace

print('fairing service: {0}/{1}'.format(namespace, service_name))

## Serving with Kubeflow

In [None]:
import json
import requests

def predict(url, data, feature_names=None):
    pdata={
        "data": {
            "names":feature_names,
            "tensor": {
                "shape": np.asarray(data.shape).tolist(),
                "values": data.flatten().tolist(),
            },
        }
    }
    serialized_data = json.dumps(pdata)
    r = requests.post(url, data={'json':serialized_data})
    return r

def extract_prediction_array(content):
    c = json.loads(content)
    return np.array(c.get('data', {}).get('tensor', {}).get('values'))

In [None]:
import numpy as np
import pprint

url = "http://{service_name}.{namespace}.svc.cluster.local:5000/predict".format(
    service_name=service_name,
    namespace=namespace)

data = np.random.randint(1, high=100, size=100)
r = predict(url, data)

prediction = extract_prediction_array(r.content)
print('prediction:')
pprint.pprint(prediction)