In [1]:
import argparse
import logging
import snappy
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.examples.wordcount import WordExtractingDoFn
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
from apache_beam.options.pipeline_options import StandardOptions
from apache_beam.transforms.window import TimestampCombiner
from apache_beam.transforms.core import CombinePerKey, GroupByKey
from apache_beam.io.avroio import ReadFromAvro
from apache_beam.transforms.combiners import Sample
import json
from operator import itemgetter
import numpy as np
from apache_beam.transforms.userstate import BagStateSpec, CombiningValueStateSpec,  TimerSpec, on_timer, StateSpec
from apache_beam.coders.coders import VarIntCoder, PickleCoder, BytesCoder, StrUtf8Coder, FastPrimitivesCoder
from apache_beam.transforms.timeutil import TimeDomain
from apache_beam.transforms.combiners import CountCombineFn
import time
from apache_beam.testing.test_pipeline import TestPipeline
from apache_beam.testing.util import assert_that
from apache_beam.testing.util import equal_to
import unittest
from apache_beam.metrics.metric import Metrics

  'Running the Apache Beam SDK on Python 3 is not yet fully supported. '


In [None]:
{"event_id": ["okex_spot", "depthUpdate", "ETH", "BTC", "1557669000145"], "event_time": 1557669000.145, "event_datetime": "2019-05-12T13:50:00+0000", "event_type": "depthUpdate", "event_time_ms": 1557669000145, "produced_time": 1557669000.3499093, "produced_datetime": "2019-05-12T13:50:00", "exchange": "okex_spot", "symbol": "ETHBTC", "quote_asset": "BTC", "base_asset": "ETH", "market_type": "spot", "first_update_id": null, "final_update_id": null, "checksum": 1871121761, "bids": [{"level": null, "norm_quantity": null, "quantity": 17.327394, "price": 0.02656, "num_orders": 4, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 31.369013, "price": 0.02654, "num_orders": 12, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 110.559377, "price": 0.02653, "num_orders": 23, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 7.345649, "price": 0.02652, "num_orders": 7, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 38.939082, "price": 0.02648, "num_orders": 6, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 3.95411, "price": 0.02645, "num_orders": 5, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}], "asks": [{"level": null, "norm_quantity": null, "quantity": 9.967403, "price": 0.02657, "num_orders": 5, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 26.382501, "price": 0.02658, "num_orders": 14, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 23.91682, "price": 0.02659, "num_orders": 6, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 18.944143, "price": 0.0266, "num_orders": 5, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 60.553544, "price": 0.02661, "num_orders": 9, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 56.667377, "price": 0.02665, "num_orders": 11, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 36.214446, "price": 0.02667, "num_orders": 6, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}, {"level": null, "norm_quantity": null, "quantity": 0.500449, "price": 0.02677, "num_orders": 2, "force_liquidated_orders": null, "__faust": {"ns": "models.depth.DepthLevel"}}], "__faust": {"ns": "models.depth.DepthUpdate"}}

In [None]:
class EnrichDepthsDoFn(beam.DoFn):
    MAX_BUFFER_SIZE = 500;

    BUFFERED_STATE = BagStateSpec(
        'buffer', 
        StrUtf8Coder()
    )
    
    PREV_STATE = BagStateSpec(
        'prev', 
        StrUtf8Coder()
    )
    
    PREV_BIDS = BagStateSpec(
        'prev', 
        StrUtf8Coder()
    )

    EXPIRY_TIMER = TimerSpec(
        'expiry', 
        TimeDomain.WATERMARK
    )
    
    MAX_BUFFER_DURATION = 5
    ALLOWED_LATENESS = 5

    def process(
              self, 
              element,
              w=beam.DoFn.WindowParam,
              buffer_state=beam.DoFn.StateParam(BUFFERED_STATE),
              expiry_timer=beam.DoFn.TimerParam(EXPIRY_TIMER)
        ):
            expiry_timer.set(w.end+EnrichDepthsDoFn.ALLOWED_LATENESS)
            jsone = json.dumps(element)

            e = element[1]

            for b in e["bids"]:
                BUFFERED_STATE.add({
                    "side": "bid"
                    "price": b["price"]
                    "quantity": b["quantity"],
                    "time": e["event_time_ms"]
                })
                
            for a in e["asks"]:
                BUFFERED_STATE.add({
                    "side": "bid"
                    "price": b["price"]
                    "quantity": b["quantity"],
                    "time": e["event_time_ms"]
                })


    @on_timer(EXPIRY_TIMER)
    def expiry(
             self,
             buffer_state=beam.DoFn.StateParam(BUFFER_STATE),
             prev_asks=beam.DoFn.StateParam(PREV_BIDS),
             prev_bids=beam.DoFn.StateParam(PREV_ASKS)
    ):
        buffer = buffer_state.read()
        prev_asks = prev_asks.read()
        
        # if prev none
        # 
        for l in sorted(
            list(buffer),
            key=itemgetter('time')
        ):
            if l["side"] == "ask":
                prev_asks[l["price"]] = l["quantity"]
            if l["side"] == "bid":
                prev_asks[l["price"]] = l["quantity"]
        
        buffer_state.clear()