#### faster pandas through numpy, vectorization and parallel computing

https://yuanjiang.space/threadpoolexecutor-map-method-with-multiple-parameters parallel trick

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.normal(size=(10000,2)))

In [3]:
def sum_square(a,b):
    return (a + b)**2

In [4]:
%timeit [sum_square(row[0], row[1]) for _, row in df.iterrows()]

205 ms ± 3.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%timeit [sum_square(a, b) for a, b in df[[0, 1]].itertuples(index=False)]

4.82 ms ± 69 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%timeit df.apply(lambda row: sum_square(row[0], row[1]), axis=1 )

54.8 ms ± 479 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
%timeit df.apply(lambda row: sum_square(row[0], row[1]), raw=True, axis=1 )

18.7 ms ± 148 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
%timeit np.vectorize(sum_square)(df[0], df[1])

1.86 ms ± 26.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [9]:
%timeit np.power(df[0] + df[1], 2)

102 µs ± 1.03 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [10]:
%timeit sum_square(df,df)

98.5 µs ± 813 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [56]:
n_core = 12
df_split = np.array_split(df, n_core)

In [57]:
import concurrent.futures
pool = concurrent.futures.ThreadPoolExecutor(max_workers = n_core)
#pool = concurrent.futures.ProcessPoolExecutor(max_workers = n_core)

#### simple function parallel

In [58]:
import sys
import temp_func as tf

In [59]:
%timeit pd.concat(pool.map(sum_square, df_split,df_split))

1.62 ms ± 9.58 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [10]:
%timeit pd.concat(pool.map(tf.sum_square, df_split,df_split))

904 µs ± 8.76 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


#### grid search parallel

In [51]:
import numpy as np
from joblib import parallel_backend # added line.
from sklearn.datasets import load_digits
from sklearn.model_selection import ParameterGrid
from sklearn.svm import SVC      

param_space = list(ParameterGrid({
    'C': [0.1,1,3,5],
    'gamma':[1,2,5],
}))

In [52]:
n_set = len(param_space)

In [114]:
digits = load_digits()
def train_all(df, param_space):
    n_set = len(param_space)
    predict_out = np.array([])
    for param in param_space:
        model = SVC(kernel='rbf',C = param['C'], gamma = param['gamma'],random_state=0)
        model.fit(df.data, df.target)
        predict_out = np.concatenate((predict_out, model.predict(df.data)))
    return predict_out

In [115]:
%timeit -r1 -n1 train_all(digits,param_space)

7.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [139]:
kobe = train_all(digits,param_space)

In [140]:
kobe.shape

(21564,)

In [108]:
param_space_split = np.array_split(param_space, n_core)

In [109]:
from itertools import repeat

In [113]:
%timeit -r1 -n1 pool.map(train_all,repeat(digits),param_space_split)

64.3 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [137]:
out =  np.hstack(pool.map(train_all,repeat(digits),param_space_split))

  out =  np.hstack(pool.map(train_all,repeat(digits),param_space_split))


In [141]:
np.array_equal(out,kobe)

True

In [124]:
param_space

[{'C': 0.1, 'gamma': 1},
 {'C': 0.1, 'gamma': 2},
 {'C': 0.1, 'gamma': 5},
 {'C': 1, 'gamma': 1},
 {'C': 1, 'gamma': 2},
 {'C': 1, 'gamma': 5},
 {'C': 3, 'gamma': 1},
 {'C': 3, 'gamma': 2},
 {'C': 3, 'gamma': 5},
 {'C': 5, 'gamma': 1},
 {'C': 5, 'gamma': 2},
 {'C': 5, 'gamma': 5}]