In [1]:
import datetime
import os
import psutil


def process_mem() -> int:
    process = psutil.Process(os.getpid())
    mb = process.memory_info().rss / 1024 / 1024 # in Mb
    return mb

## Duck typing and Ask For Forgiveness

In [2]:
class A:
    a = 'a'


class B:
    b = 'b'

In [3]:
def do_something_with_a_if_a(obj):
    if hasattr(obj, 'a'):
        return obj.a
    return None


def do_something_with_a(obj):
    try:
        return obj.a
    except AttributeError:
        return None

In [4]:
mostly_a = [A() for _ in range(1_000_000)] + [B() for _ in range(10)]

start_time = datetime.datetime.now()
_ = [do_something_with_a_if_a(o) for o in mostly_a]
delta_time = datetime.datetime.now() - start_time
print(f'Done mostly As asking, in {delta_time.total_seconds():.4f} sec')

start_time = datetime.datetime.now()
_ = [do_something_with_a(o) for o in mostly_a]
delta_time = datetime.datetime.now() - start_time
print(
    f'Done mostly As w/o asking, in {delta_time.total_seconds():.4f} sec'
)

Done mostly As asking, in 0.1782 sec
Done mostly As w/o asking, in 0.1092 sec


In [5]:
mostly_b = [A() for _ in range(10)] + [B() for _ in range(1_000_000)]

start_time = datetime.datetime.now()
_ = [do_something_with_a_if_a(o) for o in mostly_b]
delta_time = datetime.datetime.now() - start_time
print(f'Done mostly Bs asking, in {delta_time.total_seconds():.4f} sec')

start_time = datetime.datetime.now()
_ = [do_something_with_a(o) for o in mostly_b]
delta_time = datetime.datetime.now() - start_time
print(
    f'Done mostly Bs w/o asking, in {delta_time.total_seconds():.4f} sec'
)

Done mostly Bs asking, in 0.1528 sec
Done mostly Bs w/o asking, in 0.5158 sec


## Lists, Iterators and Generators

In [6]:
import random
from typing import Generator


def extract(n=1_000_000) -> Generator[int, None, None]:
    """Dummy example return random integers between -1000 and 1000."""
    for i in range(n):
        yield random.randint(-1000, 1000)


def transform(value: int) -> str:
    """Fizz buzz."""
    if value % 3 == 0 and value % 5 == 0:
        return 'fizzbuzz'
    elif value % 3 == 0:
        return 'fizz'
    elif value % 5 == 0:
        return 'buzz'
    return str(value)


### Lists

In [7]:
start_mem = process_mem()
start_time = datetime.datetime.now()

data = list(extract())
process_data = [transform(d) for d in data]

delta_mem = process_mem() - start_mem
delta_time = datetime.datetime.now() - start_time
print(
    'Done,'
    f' memory usage: {delta_mem:,.2f} MB,'
    f' in {delta_time.total_seconds():.2f} sec'
)

Done, memory usage: 67.88 MB, in 1.25 sec


### Iterators and generators

In [8]:
# Only one element from extract is in memory at a time
# But all processed objects are
start_mem = process_mem()
start_time = datetime.datetime.now()

process_data = [transform(d) for d in extract()]

delta_mem = process_mem() - start_mem
delta_time = datetime.datetime.now() - start_time
print(
    'Done,'
    f' memory usage: {delta_mem:,.2f} MB,'
    f' in {delta_time.total_seconds():.2f} sec'
)

Done, memory usage: 0.25 MB, in 1.40 sec


### Generator expressions

In [9]:
# Only one element at a time
start_mem = process_mem()
start_time = datetime.datetime.now()

process_data = (transform(d) for d in extract())
next(process_data)

delta_mem = process_mem() - start_mem
delta_time = datetime.datetime.now() - start_time
print(
    'Done,'
    f' memory usage: {delta_mem:,.2f} MB,'
    f' in {delta_time.total_seconds():.2f} sec'
)

Done, memory usage: -32.58 MB, in 0.02 sec


Note that this is *cheating* because only one element was processed

## Multiprocessing, Multithreading and AsyncIO

We are gonna be using a libary called [unsync](https://github.com/alex-sherman/unsync), which simply removes some of the overhead and clears up the example. But the end result would be the same.

In [10]:
import time
import asyncio
from itertools import chain, islice

from unsync import unsync

In [11]:
@unsync
async def transform_chunk(values):
    await asyncio.sleep(len(values)*0.0001)  # Artificial I/O
    return [transform(value) for value in values]

def slow_transform(value):
    time.sleep(0.0001)  # Artificial I/O
    return transform(value)

In [12]:
start_time = datetime.datetime.now()

process_data = (slow_transform(d) for d in extract())
_ = list(process_data)

delta_time = datetime.datetime.now() - start_time
print('Done single thread,' f' in {delta_time.total_seconds():.2f} sec')

Done single thread, in 134.18 sec


In [13]:
CHUNK = 10000


start_time = datetime.datetime.now()
tasks = []
it = extract()
for index, first in enumerate(it):
    tasks.append(transform_chunk(list(chain([first], islice(it, CHUNK)))))

_ = [t.result() for t in tasks]

delta_time = datetime.datetime.now() - start_time
print('Done async,' f' in {delta_time.total_seconds():.2f} sec')

Done async, in 2.06 sec
