## Single Consumer - Multiple Publisher
### get()

`get multiple items not more than the specified batch size` **and** `get the items within the timeout`

In [1]:
from queue import Queue, Empty
import time
from threading import Thread
from threading import Event
from typing import Callable, List
import random
import uuid

from typing import Any
from dataclasses import dataclass, field
import uuid
from threading import Event

In [2]:
class BatchedQueue:
    def __init__(self, timeout=1.0, bs=1):
        self.timeout = timeout
        self.bs = bs
        self._queue: Queue = Queue()
        self._result = []
        self._event = Event()

    def get(self):
        entered_at = time.time()
        timeout = self.timeout
        bs = self.bs

        if self._queue.qsize() >= bs:
            return [self._queue.get_nowait() for _ in range(bs)]

        while (
            self._event.wait(timeout - (time.time() - entered_at))
            and self._queue.qsize() < bs
        ):
            True

        result = []
        try:
            for _ in range(bs):
                result.append(self._queue.get_nowait())
            return result
        except Empty:
            return result

    def put(self, item):
        self._queue.put(item)
        if self._event.is_set() and self.size >= self.bs:
            self._event.set()

    @property
    def size(self):
        return self._queue.qsize()

In [3]:
q = BatchedQueue(timeout=2, bs=4)

q.put(1)
q.size

1

In [9]:
t0 = time.time()
print("size", q.size)
q.get()
print("size", q.size)
time.time() - t0

size 1
size 0


2.005971908569336

## Test with a publisher

In [6]:
import random

In [72]:
q = BatchedQueue(timeout=2, bs=4)


def publisher():
    for i in range(16):
        time.sleep(random.randint(0, 1))
        q.put(random.randint(1000, 100000))


thread1 = Thread(target=publisher, daemon=True)
thread2 = Thread(target=publisher, daemon=True)
thread3 = Thread(target=publisher, daemon=True)

thread1.start()
thread2.start()
thread3.start()

In [73]:
q.size

1

In [74]:
for i in range(12):
    t0 = time.time()
    items = q.get()
    print(items)
    t1 = time.time()
    print(f"consumed in {t1-t0:.2f}")

[41763, 37187, 92159, 26374]
consumed in 2.01
[23742, 25296, 38162, 63866]
consumed in 0.00
[59609, 44815, 63455, 36995]
consumed in 0.00
[95961, 8782, 36369, 26241]
consumed in 0.00
[26588, 25025, 62531, 82674]
consumed in 2.01
[81486, 78603, 97243, 12901]
consumed in 0.00
[98119, 30974, 75759, 66431]
consumed in 0.00
[21387, 66863, 44436, 79741]
consumed in 2.00
[71203, 10048, 65738, 49077]
consumed in 0.00
[50332, 45034, 50593, 7350]
consumed in 2.00
[76572, 49153, 62061, 17380]
consumed in 0.00
[68097, 98301, 56020, 56934]
consumed in 2.01


## Batched Processor

In [4]:
@dataclass
class WaitedObject:
    id: str = field(default_factory=lambda: str(uuid.uuid4()))
    value: Any = None
    completed: bool = False
    _event: Event = None

    def __post_init__(self):
        self._event = Event()

    def mark_complete(self):
        self.completed = True
        self._event.clear()
    
    def get(self, timeout: float=None):
        if self.completed:
            self._event.clear()
            return self.value
        else:
            self._event.wait(timeout)

In [8]:
a = WaitedObject(value=1)
a.mark_complete()
a.get(1)

1

In [9]:
class BatchProcessor:
    def __init__(self, func: Callable, timeout=1.0, bs=1, ):
        self._batched_queue = BatchedQueue()
        self.func = func
        thread = Thread(target=self._process_queue)
        thread.start()

    def _process_queue(self):
        while True:
            batch: List[WaitedObject] = self._batched_queue.get()
            if not batch: continue
            batch_values = [b.value for b in batch]
            results = self.func(batch_values)
            for b in batch:
                b.mark_complete()

    def put(self, item: Any):
        waited_obj = WaitedObject(value=item)
        self._batched_queue.put(waited_obj)
        return waited_obj        

In [10]:
def fake_ml_api(x):
    time.sleep(random.randint(0, 2))
    return random.randint(0, 1)

In [11]:
processor = BatchProcessor(fake_ml_api, timeout=2, bs=16)

In [12]:
results = []
for i in range(32):
    x = processor.put(i+100)
    print(x)
    results.append(x)

WaitedObject(id='d5024d3c-3761-4da0-b4e0-14afa649e020', value=100, completed=False, _event=<threading.Event at 0x1081b84d0: unset>)
WaitedObject(id='efbbfbae-f033-427b-84c6-2f7069604a24', value=101, completed=False, _event=<threading.Event at 0x1081beb50: unset>)
WaitedObject(id='3a52044f-d1fa-48ac-a03b-eb101dad321b', value=102, completed=False, _event=<threading.Event at 0x1081bc150: unset>)
WaitedObject(id='638303e8-97c3-4abc-8bba-bcb2ca46c342', value=103, completed=False, _event=<threading.Event at 0x1081bd250: unset>)
WaitedObject(id='dd2c6d08-3a3c-4d18-be83-5695a300f6eb', value=104, completed=False, _event=<threading.Event at 0x1081bc390: unset>)
WaitedObject(id='570c0f65-621d-42c8-adff-73ee99b96066', value=105, completed=False, _event=<threading.Event at 0x1081bc810: unset>)
WaitedObject(id='a71196b5-9139-4e81-b374-908e3ae3718c', value=106, completed=False, _event=<threading.Event at 0x1081bcb90: unset>)
WaitedObject(id='0f9d2189-d279-462d-ac87-bd0da1977613', value=107, completed

In [21]:
results

[WaitedObject(id='d5024d3c-3761-4da0-b4e0-14afa649e020', value=100, completed=True, _event=<threading.Event at 0x1081b84d0: unset>),
 WaitedObject(id='efbbfbae-f033-427b-84c6-2f7069604a24', value=101, completed=True, _event=<threading.Event at 0x1081beb50: unset>),
 WaitedObject(id='3a52044f-d1fa-48ac-a03b-eb101dad321b', value=102, completed=True, _event=<threading.Event at 0x1081bc150: unset>),
 WaitedObject(id='638303e8-97c3-4abc-8bba-bcb2ca46c342', value=103, completed=True, _event=<threading.Event at 0x1081bd250: unset>),
 WaitedObject(id='dd2c6d08-3a3c-4d18-be83-5695a300f6eb', value=104, completed=True, _event=<threading.Event at 0x1081bc390: unset>),
 WaitedObject(id='570c0f65-621d-42c8-adff-73ee99b96066', value=105, completed=True, _event=<threading.Event at 0x1081bc810: unset>),
 WaitedObject(id='a71196b5-9139-4e81-b374-908e3ae3718c', value=106, completed=True, _event=<threading.Event at 0x1081bcb90: unset>),
 WaitedObject(id='0f9d2189-d279-462d-ac87-bd0da1977613', value=107, c

In [22]:
results[0].get()

100

In [None]:
results[30].get()