## 8. Robustness and Performance

### 71 Prefer `deque` for Producer–Consumer Queues

In [1]:
class Email:
    def __init__(self, sender, receiver, message):
        self.sender = sender
        self.receiver = receiver
        self.message = message

In [2]:
def get_emails():
    yield Email('foo@example.com', 'bar@example.com', 'hello1')
    yield Email('baz@example.com', 'banana@example.com', 'hello2')
    yield None
    yield Email('meep@example.com', 'butter@example.com', 'hello3')
    yield Email('stuff@example.com', 'avocado@example.com', 'hello4')
    yield None
    yield Email('thingy@example.com', 'orange@example.com', 'hello5')
    yield Email('roger@example.com', 'bob@example.com', 'hello6')
    yield None
    yield Email('peanut@example.com', 'alice@example.com', 'hello7')
    yield None

In [3]:
EMAIL_IT = get_emails()

In [4]:
class NoEmailError(Exception):
    pass

In [5]:
def try_receive_email():
    # Returns an Email instance or raises NoEmailError
    try:
        email = next(EMAIL_IT)
    except StopIteration:
        email = None

    if not email:
        raise NoEmailError

    print(f'Produced email: {email.message}')
    return email

In [6]:
def produce_emails(queue):
    while True:
        try:
            email = try_receive_email()
        except NoEmailError:
            return
        else:
            queue.append(email)  # Producer

In [7]:
def consume_one_email(queue):
    if not queue:
        return
    email = queue.pop(0)  # Consumer
    # Index the message for long-term archival
    print(f'Consumed email: {email.message}')

In [8]:
def loop(queue, keep_running):
    while keep_running():
        produce_emails(queue)
        consume_one_email(queue)

def make_test_end():
    count=list(range(10))

    def func():
        if count:
            count.pop()
            return True
        return False

    return func


def my_end_func():
    pass

my_end_func = make_test_end()

loop([], my_end_func)

Produced email: hello1
Produced email: hello2
Consumed email: hello1
Produced email: hello3
Produced email: hello4
Consumed email: hello2
Produced email: hello5
Produced email: hello6
Consumed email: hello3
Produced email: hello7
Consumed email: hello4
Consumed email: hello5
Consumed email: hello6
Consumed email: hello7


In [9]:
import timeit

In [10]:
def print_results(count, tests):
    avg_iteration = sum(tests) / len(tests)
    print(f'Count {count:>5,} takes {avg_iteration:.6f}s')
    return count, avg_iteration

In [11]:
def list_append_benchmark(count):
    def run(queue):
        for i in range(count):
            queue.append(i)

    tests = timeit.repeat(
        setup='queue = []',
        stmt='run(queue)',
        globals=locals(),
        repeat=1000,
        number=1)

    return print_results(count, tests)

In [12]:
def print_delta(before, after):
    before_count, before_time = before
    after_count, after_time = after
    growth = 1 + (after_count - before_count) / before_count
    slowdown = 1 + (after_time - before_time) / before_time
    print(f'{growth:>4.1f}x data size, {slowdown:>4.1f}x time')

In [13]:
baseline = list_append_benchmark(500)
for count in (1_000, 2_000, 3_000, 4_000, 5_000):
    print()
    comparison = list_append_benchmark(count)
    print_delta(baseline, comparison)

Count   500 takes 0.000033s

Count 1,000 takes 0.000062s
 2.0x data size,  1.9x time

Count 2,000 takes 0.000124s
 4.0x data size,  3.7x time

Count 3,000 takes 0.000180s
 6.0x data size,  5.4x time

Count 4,000 takes 0.000243s
 8.0x data size,  7.3x time

Count 5,000 takes 0.000300s
10.0x data size,  9.0x time


In [14]:
def list_pop_benchmark(count):
    def prepare():
        return list(range(count))

    def run(queue):
        while queue:
            queue.pop(0)

    tests = timeit.repeat(
        setup='queue = prepare()',
        stmt='run(queue)',
        globals=locals(),
        repeat=1000,
        number=1)

    return print_results(count, tests)

In [15]:
baseline = list_pop_benchmark(500)
for count in (1_000, 2_000, 3_000, 4_000, 5_000):
    print()
    comparison = list_pop_benchmark(count)
    print_delta(baseline, comparison)

Count   500 takes 0.000083s

Count 1,000 takes 0.000220s
 2.0x data size,  2.7x time

Count 2,000 takes 0.000636s
 4.0x data size,  7.7x time

Count 3,000 takes 0.001258s
 6.0x data size, 15.2x time

Count 4,000 takes 0.002165s
 8.0x data size, 26.2x time

Count 5,000 takes 0.003615s
10.0x data size, 43.7x time


In [16]:
import collections

In [17]:
def consume_one_email(queue):
    if not queue:
        return
    email = queue.popleft()  # Consumer
    # Process the email message
    print(f'Consumed email: {email.message}')

In [18]:
def my_end_func():
    pass

my_end_func = make_test_end()

EMAIL_IT = get_emails()

loop(collections.deque(), my_end_func)

Produced email: hello1
Produced email: hello2
Consumed email: hello1
Produced email: hello3
Produced email: hello4
Consumed email: hello2
Produced email: hello5
Produced email: hello6
Consumed email: hello3
Produced email: hello7
Consumed email: hello4
Consumed email: hello5
Consumed email: hello6
Consumed email: hello7


In [19]:
def deque_append_benchmark(count):
    def prepare():
        return collections.deque()

    def run(queue):
        for i in range(count):
            queue.append(i)

    tests = timeit.repeat(
        setup='queue = prepare()',
        stmt='run(queue)',
        globals=locals(),
        repeat=1000,
        number=1)
    
    return print_results(count, tests)

In [20]:
baseline = deque_append_benchmark(500)
for count in (1_000, 2_000, 3_000, 4_000, 5_000):
    print()
    comparison = deque_append_benchmark(count)
    print_delta(baseline, comparison)

Count   500 takes 0.000034s

Count 1,000 takes 0.000063s
 2.0x data size,  1.9x time

Count 2,000 takes 0.000126s
 4.0x data size,  3.7x time

Count 3,000 takes 0.000187s
 6.0x data size,  5.5x time

Count 4,000 takes 0.000259s
 8.0x data size,  7.6x time

Count 5,000 takes 0.000305s
10.0x data size,  9.0x time


In [21]:
def dequeue_popleft_benchmark(count):
    def prepare():
        return collections.deque(range(count))

    def run(queue):
        while queue:
            queue.popleft()

    tests = timeit.repeat(
        setup='queue = prepare()',
        stmt='run(queue)',
        globals=locals(),
        repeat=1000,
        number=1)

    return print_results(count, tests)

In [22]:
baseline = dequeue_popleft_benchmark(500)
for count in (1_000, 2_000, 3_000, 4_000, 5_000):
    print()
    comparison = dequeue_popleft_benchmark(count)
    print_delta(baseline, comparison)

Count   500 takes 0.000028s

Count 1,000 takes 0.000054s
 2.0x data size,  1.9x time

Count 2,000 takes 0.000109s
 4.0x data size,  3.8x time

Count 3,000 takes 0.000177s
 6.0x data size,  6.2x time

Count 4,000 takes 0.000229s
 8.0x data size,  8.1x time

Count 5,000 takes 0.000276s
10.0x data size,  9.7x time


> - 생산자는 `append`를 호출해 원소를 추가하고 소비자는 `pop(0)`을 사용해 원소를 받게 만들면 리스트 타입을 FIFO 큐로 사용할 수 있다. 하지만 리스트를 FIFO 큐로 사용하면, 큐 길이가 늘어남에 따라 `pop(0)`의 성능이 선형보다 더 크게 나빠지기 때문에 문제가 될 수 있다.
> - `collections` 내장 모듈에 있는 `deque` 클래스는 큐 길이와 관계없이 상수 시간 만에 `append`와 `popleft`를 수행하기 때문에 FIFO 큐 구현에 이상적이다.