In [1]:
import waterhealer as wh
import json

In [2]:
source = wh.from_kafka(
    ['testing'],
    {
        'bootstrap.servers': 'localhost:9095, localhost:9096',
        'group.id': 'group-test-4',
        'auto.offset.reset': 'latest',
    },
    debug = True)

In [3]:
def json_loads(row):
    return (row[0], json.loads(row[1]))

def plus(row):
    no = row[1]['no']
    if no > 50:
        raise Exception('row is bigger than 50')
    return (row[0], no + 1)

If you stream high velocity, you can see, `healing_batch` is not fast enough. And you think, can we do batch of `healing_batch` in async manner.

Yes, we can, use `foreach_async`!

`foreach_async` will execute same function for each element in the batch.

In [4]:
outputs = []
source.map(json_loads).map(plus).partition(5).partition(5)\
.foreach_async(wh.healing_batch, stream = source, asynchronous = True)\
.flatten().flatten().sink(outputs.append)

Output()

For `partition(5).partition(5)`,

```python
(tuple_of_5, tuple_of_5, tuple_of_5, tuple_of_5, tuple_of_5)
```

We only start to update offsets if we got `Tuple[len(Tuple)==5]`

In [5]:
source.start()

In [6]:
from kafka import KafkaProducer
from kafka.partitioner import RoundRobinPartitioner

In [7]:
producer = KafkaProducer(
    bootstrap_servers = ['localhost:9095', 'localhost:9096'],
    api_version = (0, 10),
    partitioner = RoundRobinPartitioner(),
)

In [8]:
producer.bootstrap_connected()

True

In [9]:
from datetime import datetime
import json
from itertools import cycle

def publish_message(producer_instance, topic_name, key, value):
    try:
        key_bytes = bytes(key, encoding = 'utf-8')
        value_bytes = bytes(value, encoding = 'utf-8')
        x = producer_instance.send(topic_name, value = value_bytes)
        return True
    except Exception as ex:
        print('Exception in publishing message')
        print(str(ex))
        return False

## purposely raise the error

Based on function `plus`, if input bigger than 10, it will throw error.

In [10]:
import confluent_kafka as ck

source.consumer.committed([ck.TopicPartition('testing', 0)])[0].offset

263

In [11]:
import time

for count in range(60):
    data = {'no': count, 'datetime': str(datetime.now())}
    print(data)
    publish_message(
        producer, 'testing', 'streaming', json.dumps(data)
    )
    time.sleep(0.2)

{'no': 0, 'datetime': '2020-03-07 21:06:30.542224'}
{'no': 1, 'datetime': '2020-03-07 21:06:30.743152'}


topic: testing, partition: 0, offset: 263, data: b'{"no": 0, "datetime": "2020-03-07 21:06:30.542224"}'
topic: testing, partition: 2, offset: 249, data: b'{"no": 2, "datetime": "2020-03-07 21:06:30.944148"}'


{'no': 2, 'datetime': '2020-03-07 21:06:30.944148'}
{'no': 3, 'datetime': '2020-03-07 21:06:31.149697'}
{'no': 4, 'datetime': '2020-03-07 21:06:31.350919'}


topic: testing, partition: 4, offset: 221, data: b'{"no": 4, "datetime": "2020-03-07 21:06:31.350919"}'


{'no': 5, 'datetime': '2020-03-07 21:06:31.552924'}


topic: testing, partition: 6, offset: 210, data: b'{"no": 6, "datetime": "2020-03-07 21:06:31.755028"}'


{'no': 6, 'datetime': '2020-03-07 21:06:31.755028'}
{'no': 7, 'datetime': '2020-03-07 21:06:31.959511'}
{'no': 8, 'datetime': '2020-03-07 21:06:32.160347'}


topic: testing, partition: 8, offset: 207, data: b'{"no": 8, "datetime": "2020-03-07 21:06:32.160347"}'


{'no': 9, 'datetime': '2020-03-07 21:06:32.363402'}
{'no': 10, 'datetime': '2020-03-07 21:06:32.565758'}
{'no': 11, 'datetime': '2020-03-07 21:06:32.766589'}


topic: testing, partition: 0, offset: 264, data: b'{"no": 10, "datetime": "2020-03-07 21:06:32.565758"}'
topic: testing, partition: 2, offset: 250, data: b'{"no": 12, "datetime": "2020-03-07 21:06:32.968442"}'


{'no': 12, 'datetime': '2020-03-07 21:06:32.968442'}
{'no': 13, 'datetime': '2020-03-07 21:06:33.169955'}


topic: testing, partition: 4, offset: 222, data: b'{"no": 14, "datetime": "2020-03-07 21:06:33.376026"}'


{'no': 14, 'datetime': '2020-03-07 21:06:33.376026'}
{'no': 15, 'datetime': '2020-03-07 21:06:33.577786'}


topic: testing, partition: 6, offset: 211, data: b'{"no": 16, "datetime": "2020-03-07 21:06:33.781661"}'


{'no': 16, 'datetime': '2020-03-07 21:06:33.781661'}
{'no': 17, 'datetime': '2020-03-07 21:06:33.982581'}


topic: testing, partition: 8, offset: 208, data: b'{"no": 18, "datetime": "2020-03-07 21:06:34.186938"}'


{'no': 18, 'datetime': '2020-03-07 21:06:34.186938'}
{'no': 19, 'datetime': '2020-03-07 21:06:34.391957'}


topic: testing, partition: 0, offset: 265, data: b'{"no": 20, "datetime": "2020-03-07 21:06:34.597766"}'


{'no': 20, 'datetime': '2020-03-07 21:06:34.597766'}
{'no': 21, 'datetime': '2020-03-07 21:06:34.802187'}


topic: testing, partition: 2, offset: 251, data: b'{"no": 22, "datetime": "2020-03-07 21:06:35.005447"}'


{'no': 22, 'datetime': '2020-03-07 21:06:35.005447'}
{'no': 23, 'datetime': '2020-03-07 21:06:35.210654'}


topic: testing, partition: 4, offset: 223, data: b'{"no": 24, "datetime": "2020-03-07 21:06:35.411184"}'


{'no': 24, 'datetime': '2020-03-07 21:06:35.411184'}
{'no': 25, 'datetime': '2020-03-07 21:06:35.615623'}


topic: testing, partition: 6, offset: 212, data: b'{"no": 26, "datetime": "2020-03-07 21:06:35.820357"}'


{'no': 26, 'datetime': '2020-03-07 21:06:35.820357'}
{'no': 27, 'datetime': '2020-03-07 21:06:36.024983'}
{'no': 28, 'datetime': '2020-03-07 21:06:36.225880'}


topic: testing, partition: 8, offset: 209, data: b'{"no": 28, "datetime": "2020-03-07 21:06:36.225880"}'


{'no': 29, 'datetime': '2020-03-07 21:06:36.426729'}


topic: testing, partition: 0, offset: 266, data: b'{"no": 30, "datetime": "2020-03-07 21:06:36.630802"}'


{'no': 30, 'datetime': '2020-03-07 21:06:36.630802'}
{'no': 31, 'datetime': '2020-03-07 21:06:36.835094'}


topic: testing, partition: 2, offset: 252, data: b'{"no": 32, "datetime": "2020-03-07 21:06:37.039703"}'


{'no': 32, 'datetime': '2020-03-07 21:06:37.039703'}
{'no': 33, 'datetime': '2020-03-07 21:06:37.242128'}


topic: testing, partition: 4, offset: 224, data: b'{"no": 34, "datetime": "2020-03-07 21:06:37.446527"}'


{'no': 34, 'datetime': '2020-03-07 21:06:37.446527'}
{'no': 35, 'datetime': '2020-03-07 21:06:37.649057'}
{'no': 36, 'datetime': '2020-03-07 21:06:37.851959'}
{'no': 37, 'datetime': '2020-03-07 21:06:38.053436'}


topic: testing, partition: 6, offset: 213, data: b'{"no": 36, "datetime": "2020-03-07 21:06:37.851959"}'
topic: testing, partition: 8, offset: 210, data: b'{"no": 38, "datetime": "2020-03-07 21:06:38.256978"}'


{'no': 38, 'datetime': '2020-03-07 21:06:38.256978'}
{'no': 39, 'datetime': '2020-03-07 21:06:38.462860'}


topic: testing, partition: 0, offset: 267, data: b'{"no": 40, "datetime": "2020-03-07 21:06:38.663693"}'


{'no': 40, 'datetime': '2020-03-07 21:06:38.663693'}
{'no': 41, 'datetime': '2020-03-07 21:06:38.867563'}


topic: testing, partition: 2, offset: 253, data: b'{"no": 42, "datetime": "2020-03-07 21:06:39.072032"}'


{'no': 42, 'datetime': '2020-03-07 21:06:39.072032'}
{'no': 43, 'datetime': '2020-03-07 21:06:39.275037'}
{'no': 44, 'datetime': '2020-03-07 21:06:39.477524'}


topic: testing, partition: 4, offset: 225, data: b'{"no": 44, "datetime": "2020-03-07 21:06:39.477524"}'


{'no': 45, 'datetime': '2020-03-07 21:06:39.678091'}


topic: testing, partition: 6, offset: 214, data: b'{"no": 46, "datetime": "2020-03-07 21:06:39.882560"}'


{'no': 46, 'datetime': '2020-03-07 21:06:39.882560'}
{'no': 47, 'datetime': '2020-03-07 21:06:40.086523'}


topic: testing, partition: 8, offset: 211, data: b'{"no": 48, "datetime": "2020-03-07 21:06:40.289649"}'


{'no': 48, 'datetime': '2020-03-07 21:06:40.289649'}
{'no': 49, 'datetime': '2020-03-07 21:06:40.491988'}
{'no': 50, 'datetime': '2020-03-07 21:06:40.693707'}
{'no': 51, 'datetime': '2020-03-07 21:06:40.896380'}
{'no': 52, 'datetime': '2020-03-07 21:06:41.100873'}
{'no': 53, 'datetime': '2020-03-07 21:06:41.301329'}
{'no': 54, 'datetime': '2020-03-07 21:06:41.502680'}
{'no': 55, 'datetime': '2020-03-07 21:06:41.703795'}
{'no': 56, 'datetime': '2020-03-07 21:06:41.909176'}
{'no': 57, 'datetime': '2020-03-07 21:06:42.112695'}
{'no': 58, 'datetime': '2020-03-07 21:06:42.317583'}
{'no': 59, 'datetime': '2020-03-07 21:06:42.522394'}


topic: testing, partition: 0, offset: 268, data: b'{"no": 50, "datetime": "2020-03-07 21:06:40.693707"}'
topic: testing, partition: 2, offset: 254, data: b'{"no": 52, "datetime": "2020-03-07 21:06:41.100873"}'
row is bigger than 50
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/streamz/core.py", line 578, in update
    result = self.func(x, *self.args, **self.kwargs)
  File "<ipython-input-3-d429f1b7c5ca>", line 7, in plus
    raise Exception('row is bigger than 50')
Exception: row is bigger than 50
tornado.application - ERROR - Exception in callback functools.partial(<bound method IOLoop._discard_future_result of <zmq.eventloop.ioloop.ZMQIOLoop object at 0x104ee9690>>, <Future finished exception=Exception('row is bigger than 50')>)
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.7/site-packages/tornado/ioloop.py", line 

In [12]:
len(outputs)

25

Again, if a single throw an exception, entire batch will not update.