# Enrich Stream
  --------------------------------------------------------------------

##### This notebook will create a function that will enrich relevant events with socioeconomic data.


## Create and Test a Local Function 
Import nuclio SDK and magics, <b>do not remove the cell and comment !!!</b>

In [1]:
# nuclio: ignore
import nuclio

#### Functions imports

In [2]:
# nuclio: start-code

In [3]:
import os
import hashlib
import json
import v3io.dataplane

<b>Specify function dependencies and configuration<b>

In [4]:
%nuclio cmd -c pip install v3io

In [5]:
%%nuclio env
V3IO_ACCESS_KEY = ${V3IO_ACCESS_KEY}
CONTAINER = users
OUTPUT_STREAM_PATH = ${V3IO_USERNAME}/examples/rapid-prototype/enriched-events-stream
SHARDS_COUNT = 8
PARTITION_ATTR = user_id

ENRICHMENT_TABLE_PATH = ${V3IO_USERNAME}/examples/rapid-prototype/enrichment-table
ENRICHMENT_KEY = postcode

%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'CONTAINER' environment variable
%nuclio: setting 'OUTPUT_STREAM_PATH' environment variable
%nuclio: setting 'SHARDS_COUNT' environment variable
%nuclio: setting 'PARTITION_ATTR' environment variable
%nuclio: setting 'ENRICHMENT_TABLE_PATH' environment variable
%nuclio: setting 'ENRICHMENT_KEY' environment variable


In [6]:
%%nuclio config
spec.triggers.v3io_stream.kind = "v3ioStream"
spec.triggers.v3io_stream.disabled = false
spec.triggers.v3io_stream.url = "http://v3io-webapi:8081/users/${V3IO_USERNAME}/examples/rapid-prototype/incoming-events-stream@enrichstream"
spec.triggers.v3io_stream.maxWorkers = 10
spec.triggers.v3io_stream.password = "${V3IO_ACCESS_KEY}"
spec.triggers.v3io_stream.attributes.pollingIntervalMs = 500
spec.triggers.v3io_stream.attributes.seekTo = "earliest"
spec.triggers.v3io_stream.attributes.readBatchSize = 64


%nuclio: setting spec.triggers.v3io_stream.kind to 'v3ioStream'
%nuclio: setting spec.triggers.v3io_stream.disabled to False
%nuclio: setting spec.triggers.v3io_stream.url to 'http://v3io-webapi:8081/users/michaelk/examples/rapid-prototype/incoming-events-stream@enrichstream'
%nuclio: setting spec.triggers.v3io_stream.maxWorkers to 10
%nuclio: setting spec.triggers.v3io_stream.password to 'b01eb2f1-294a-4f63-b0a6-42561e3e1706'
%nuclio: setting spec.triggers.v3io_stream.attributes.pollingIntervalMs to 500
%nuclio: setting spec.triggers.v3io_stream.attributes.seekTo to 'earliest'
%nuclio: setting spec.triggers.v3io_stream.attributes.readBatchSize to 64


### Manage output stream

In [7]:
# nuclio: ignore

V3IO_ACCESS_KEY = os.getenv('V3IO_ACCESS_KEY')
CONTAINER = os.getenv('CONTAINER')
OUTPUT_STREAM_PATH = os.getenv('OUTPUT_STREAM_PATH')
SHARDS_COUNT = int(os.getenv('SHARDS_COUNT'))
    
v3io_client = v3io.dataplane.Client(endpoint='http://v3io-webapi:8081', access_key=V3IO_ACCESS_KEY)


#### Create output stream

In [8]:
# nuclio: ignore
resp = v3io_client.create_stream(container=CONTAINER,
                           path=OUTPUT_STREAM_PATH,
                           shard_count=SHARDS_COUNT)
resp.status_code

204

#### Delete output stream

In [None]:
# nuclio: ignore
resp = v3io_client.delete_stream(container=CONTAINER, path=OUTPUT_STREAM_PATH)
resp.body

## Function code

In [8]:
def init_context(context):
    V3IO_ACCESS_KEY = os.getenv('V3IO_ACCESS_KEY')
    CONTAINER = os.getenv('CONTAINER')
    OUTPUT_STREAM_PATH = os.getenv('OUTPUT_STREAM_PATH')
    SHARDS_COUNT = os.getenv('SHARDS_COUNT')
    PARTITION_ATTR = os.getenv('PARTITION_ATTR')
    ENRICHMENT_TABLE_PATH = os.getenv('ENRICHMENT_TABLE_PATH')
    ENRICHMENT_KEY = os.getenv('ENRICHMENT_KEY')
    v3io_client = v3io.dataplane.Client(endpoint='http://v3io-webapi:8081', access_key=V3IO_ACCESS_KEY)
    
    setattr(context, 'v3io_client', v3io_client)
    setattr(context, 'partition_attr', PARTITION_ATTR)
    setattr(context, 'shards_count', int(SHARDS_COUNT))
    setattr(context, 'container', CONTAINER)
    setattr(context, 'output_stream_path', OUTPUT_STREAM_PATH)
    
    setattr(context, 'enrichment_table_path', ENRICHMENT_TABLE_PATH)
    setattr(context, 'enrichment_key', ENRICHMENT_KEY)


def handler(context, event):
    if type(event.body) is dict:
        event_dict = event.body
    else:
        event_dict = json.loads(event.body)
        
    context.logger.info_with('Got invoked',
                             trigger_kind=event.trigger.kind,
                             event_body=event_dict)
        
    partition_key = event_dict.get(context.partition_attr)
    
    record = {}
    if event_dict['event_type'] == 'registration':
        enriched_event = enrich_event(context, event_dict)
        record = event_to_record(enriched_event, partition_key)
    else:
        record = event_to_record(event_dict, partition_key)
    
    resp = context.v3io_client.put_records(container=context.container, 
                                   path=context.output_stream_path, 
                                   records=[record], 
                                   raise_for_status=v3io.dataplane.RaiseForStatus.never)
    
    context.logger.info_with('Sent event to stream', 
                             record=record,
                             response_status=resp.status_code, 
                             response_body=resp.body.decode('utf-8'))
    
    return resp.status_code


def enrich_event(context, event_dict):
    if context.enrichment_key in event_dict:
        enrichment_key_value = event_dict[context.enrichment_key]
        resp = context.v3io_client.get_item(container=context.container, 
                                            path=os.path.join(context.enrichment_table_path, str(enrichment_key_value)),
                                           raise_for_status=v3io.dataplane.RaiseForStatus.never)
        if 200 <= resp.status_code <= 299:
            enriched_event = {**event_dict, **resp.output.item}
            context.logger.info_with('Event was enriched', enriched_event=enriched_event)
            return enriched_event
        else:
            context.logger.debug_with("Couldn't enrich event", 
                                      enrichment_key_value=enrichment_key_value,
                                      response_status=resp.status_code, 
                                      response_body=resp.body.decode('utf-8'))
            return event_dict
    else:
        return event_dict

    
def event_to_record(event_dict, partition_key):
    event_str = json.dumps(event_dict)
    return {'data': event_str, 'partition_key': str(partition_key)}

The following end-code annotation tells ```nuclio``` to stop parsing the notebook from this cell. _**Please do not remove this cell**_:

In [9]:
# nuclio: end-code
# marks the end of a code section

## Test locally

In [10]:
event = nuclio.Event(body=b'{"user_id" : 111111 , "event_type": "registration", "postcode": 11014}')
init_context(context)
handler(context, event)


Python> 2020-07-29 12:48:20,930 [info] Got invoked: {'trigger_kind': '', 'event_body': {'user_id': 111111, 'event_type': 'registration', 'postcode': 11014}}
Python> 2020-07-29 12:48:20,932 [info] Event was enriched: {'enriched_event': {'user_id': 111111, 'event_type': 'registration', 'postcode': 11014, 'socioeconomic_idx': 2}}
Python> 2020-07-29 12:48:20,935 [info] Sent event to stream: {'record': {'data': '{"user_id": 111111, "event_type": "registration", "postcode": 11014, "socioeconomic_idx": 2}', 'partition_key': '111111'}, 'response_status': 200, 'response_body': '{ "FailedRecordCount":0,"Records": [{ "SequenceNumber":2,"ShardId":5 } ] }'}


200

## Deploy function

In [11]:
%nuclio deploy -p rapid-prototype-mk -n ${V3IO_USERNAME}-enrich-stream

[nuclio] 2020-07-29 12:48:40,306 (info) Build complete
[nuclio] 2020-07-29 12:48:43,351 done updating michaelk-enrich-stream, function address: 192.168.226.12:30498
%nuclio: function deployed
