# Joblib

Набор утилит для распараллеливания задач, эффективного дискового кэширования и сериализации

In [1]:
import tempfile

import joblib
import numpy as np
import numpy.typing as npt
from joblib import Memory, Parallel, delayed

### Кэширование

In [2]:
with tempfile.TemporaryDirectory() as cachedir:
    mem = Memory(cachedir)
    
    @mem.cache
    def long_running_func(arr: npt.ArrayLike) -> npt.ArrayLike:
        return np.linalg.pinv(arr)
    
    
    arr = np.random.random(size=(1000, 1000))
    long_running_func(arr)
    long_running_func(arr)
        

________________________________________________________________________________
[Memory] Calling __main__--tmp-ipykernel-2307173161.long_running_func...
long_running_func(array([[0.464155, ..., 0.773191],
       ...,
       [0.4616  , ..., 0.706032]]))
________________________________________________long_running_func - 3.2s, 0.1min


### Параллельные вычисления

In [3]:
import os
import time
from typing import List


def func(x: npt.ArrayLike) -> np.float64:
    time.sleep(1)
    return np.linalg.pinv(x * 5 + x).sum()

matrixes = [np.random.randn(1000, 1000) for _ in range(8)]

def func_parallel(matrixes: List[npt.ArrayLike]):
    with Parallel(n_jobs=os.cpu_count()) as parallel:
        result = parallel([delayed(func)(matr) for matr in matrixes])
        return result
        
def func_seq(matrixes: List[npt.ArrayLike]):
    result = [func(matr) for matr in matrixes]
    return result

In [4]:
%time r1 = func_parallel(matrixes)

CPU times: user 50.5 ms, sys: 91.2 ms, total: 142 ms
Wall time: 8.54 s


In [5]:
%time r2 = func_seq(matrixes)

CPU times: user 17.6 s, sys: 17.2 s, total: 34.8 s
Wall time: 18.3 s


### Сериализация

In [6]:
# hdf5
with tempfile.NamedTemporaryFile(suffix='.lz4') as fn:
    
    big_array = np.random.random(size=(1000, 1000))
    
    with open(fn.name, 'wb') as f:
        joblib.dump(big_array, f)
        
    print(joblib.load(open(fn.name, 'rb')))

[[0.70048133 0.33807662 0.96802752 ... 0.8726561  0.53235966 0.95756961]
 [0.98344339 0.76825222 0.18895724 ... 0.64626303 0.2730661  0.62092832]
 [0.79266053 0.37509146 0.04068405 ... 0.17779883 0.07822607 0.32777225]
 ...
 [0.36306885 0.3314119  0.17422314 ... 0.35247475 0.99715434 0.96220759]
 [0.71764039 0.56004204 0.52307381 ... 0.76640573 0.08748426 0.40420756]
 [0.56263277 0.53453473 0.77335292 ... 0.5445814  0.87719711 0.63730799]]
