<img src="../../img/python-logo-no-text.svg"
     style="display:block;margin:auto;width:10%"/>
<br>
<div style="text-align:center; font-size:200%;">
  <b>Joblib</b>
</div>
<br/>
<div style="text-align:center;">Dr. Matthias Hölzl</div>
<br/>
<div style="text-align:center;">module_320_concurrency/topic_210_d4_joblib</div>


# Joblib

In [None]:
import logging
from functools import cache, lru_cache
from typing import Any, Callable

from joblib import Parallel, delayed, Memory
from timeit import timeit
from pathlib import Path


def maybe_start_logger():
    my_logger = logging.getLogger("my_logger")
    if len(my_logger.handlers) == 0:
        my_logger.setLevel(logging.INFO)
        my_logger.addHandler(logging.StreamHandler())
    return my_logger

In [None]:
if __name__ == "__main__":
    logger = maybe_start_logger()

In [None]:
def perform_computation(data, num_iterations=1_000_000):
    result = 0
    for i in range(num_iterations):
        result += 1
    return f"->{data}: {result}"

In [None]:
ComputationType = Callable[[Any], Any]

In [None]:
def perform_computations_sequentially(
    num_requests: int, computation: ComputationType = perform_computation
) -> object:
    results = []
    for i in range(num_requests):
        results.append(computation(i))
    return results

In [None]:
timeit(lambda: perform_computations_sequentially(5), globals=globals(), number=10)

In [None]:
def perform_computations_with_joblib(
    num_requests, computation: ComputationType = perform_computation
):
    jobs = [delayed(computation)(i) for i in range(num_requests)]
    results = Parallel(n_jobs=32)(jobs)
    return results

In [None]:
perform_computations_with_joblib(5)

In [None]:
timeit(lambda: perform_computations_with_joblib(5), globals=globals(), number=10)

In [None]:
timeit(lambda: perform_computations_with_joblib(32), globals=globals(), number=10)

In [None]:
timeit(lambda: perform_computations_with_joblib(128), globals=globals(), number=10)

In [None]:
cachedir = Path.home() / "Tmp"

In [None]:
assert cachedir.exists()

In [None]:
memory = Memory(cachedir, verbose=2)

In [None]:
@memory.cache
def perform_memoized_computation(data, num_iterations=1_000_000):
    my_data = data
    result = 0
    for i in range(num_iterations):
        result += 1
    return f"->{my_data}: {result}"

In [None]:
perform_computations_with_joblib(5, perform_memoized_computation)

In [None]:
timeit(
    lambda: perform_computations_with_joblib(5, perform_memoized_computation),
    globals=globals(),
    number=10,
)

In [None]:
timeit(
    lambda: perform_computations_with_joblib(32, perform_memoized_computation),
    globals=globals(),
    number=10,
)

In [None]:
timeit(
    lambda: perform_computations_with_joblib(128, perform_memoized_computation),
    globals=globals(),
    number=10,
)

In [None]:
perform_computations_sequentially(5, perform_memoized_computation)

In [None]:
timeit(
    lambda: perform_computations_sequentially(5, perform_memoized_computation),
    globals=globals(),
    number=10,
)

In [None]:
timeit(
    lambda: perform_computations_sequentially(32, perform_memoized_computation),
    globals=globals(),
    number=10,
)

In [None]:
timeit(
    lambda: perform_computations_sequentially(128, perform_memoized_computation),
    globals=globals(),
    number=10,
)

In [None]:
@cache
def perform_cached_computation(data, num_iterations=1_000_000):
    my_data = data
    result = 0
    for i in range(num_iterations):
        result += 1
    return f"->{my_data}: {result}"

In [None]:
perform_computations_sequentially(5, perform_cached_computation)

In [None]:
timeit(
    lambda: perform_computations_sequentially(5, perform_cached_computation),
    globals=globals(),
    number=10,
)

In [None]:
timeit(
    lambda: perform_computations_sequentially(32, perform_cached_computation),
    globals=globals(),
    number=10,
)

In [None]:
timeit(
    lambda: perform_computations_sequentially(128, perform_cached_computation),
    globals=globals(),
    number=10,
)


def perform_computation_and_log(data, num_iterations=1_000_000):
    my_logger = maybe_start_logger()
    my_logger.info(f"Running computation with {data}")
    result = 0
    for i in range(num_iterations):
        result += 1
    return f"->{data}: {result}"

In [None]:
perform_computations_sequentially(5, perform_computation_and_log)

In [None]:
perform_computations_with_joblib(5, perform_computation_and_log)