<p style="font-size:200%;font-weight:bold">加速Numpy</p>

- 作者：丁文治
- 日期：2022年4月2日

This is simply testing codes from (maybe the author of Pandas, but I lost the source. Please let me know if you know who write that post...).

In [10]:
import dask.dataframe as dd
import pandas as pd
import swifter
import numpy as np
import numba
from sklearn.linear_model import LinearRegression
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=False)

INFO: Pandarallel will run on 24 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


# 生成数据

In [2]:
df = pd.DataFrame(np.random.random([1_000, 14]))
df.shape
df.head(3)

(1000, 14)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.763338,0.713509,0.195882,0.495666,0.624177,0.585234,0.40449,0.263616,0.404135,0.531574,0.183177,0.810131,0.647456,0.302777
1,0.434267,0.392337,0.963468,0.00468,0.83685,0.850738,0.579568,0.321746,0.99133,0.303824,0.290518,0.879694,0.886661,0.622307
2,0.1028,0.879397,0.225718,0.51614,0.695354,0.81583,0.755212,0.862049,0.727086,0.786461,0.42714,0.415951,0.924709,0.804058


# 定义函数

In [5]:
def ols_sklearn(row):
    est = LinearRegression()
    X =np.arange(row.shape[0]).reshape(-1, 1)
    est.fit(X, row.values)
    m = est.coef_[0]
    return m


def ols_sklearn_raw(row):
    est = LinearRegression()
    X =np.arange(row.shape[0]).reshape(-1, 1)
    est.fit(X, row)
    m = est.coef_[0]
    return m


def ols_lstsq(row):
    X =np.arange(row.shape[0])
    ones = np.ones(row.shape[0])
    A = np.vstack((X, ones)).T
    m, c = np.linalg.lstsq(A, row.values, rcond=-1)[0]
    return m


def ols_lstsq_raw(row):
    X =np.arange(row.shape[0])
    ones = np.ones(row.shape[0])
    A = np.vstack((X, ones)).T
    m, c = np.linalg.lstsq(A, row, rcond=-1)[0]
    return m


@numba.jit(nopython=True)
def ols_lstsq_raw_numba(row):
    X =np.arange(row.shape[0])
    ones = np.ones(row.shape[0])
    A = np.vstack((X, ones)).T
    m, c = np.linalg.lstsq(A, row, rcond=-1)[0]
    return m

ms = df[:1].apply(ols_lstsq_raw_numba, axis=1, raw=True)  # Precompile Numba

# 测试速度

## NumPy + iloc

In [11]:
%%timeit

ms = []

for row_idx in range(df.shape[0]):
    row = df.iloc[row_idx]
    m = ols_lstsq(row)
    ms.append(m)
result_iloc = pd.Series(ms)

74.1 ms ± 2.99 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## NumPy + iterrows

In [12]:
%%timeit

ms = []

for row_idx, row in df.iterrows():
    m = ols_lstsq(row)
    ms.append(m)
result_iterrows = pd.Series(ms)

58.5 ms ± 2.85 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## NumPy + apply

In [13]:
%%timeit

ms = df.apply(ols_lstsq, axis=1)
result_apply = pd.Series(ms)

36.7 ms ± 3.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## NumPy + apply (raw=True)

In [15]:
%%timeit

ms = df.apply(ols_lstsq_raw, axis=1, raw=True)
result_apply_raw = pd.Series(ms)

29.8 ms ± 2.77 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## NumPy + swifter + apply (raw=True)

In [18]:
%%timeit

ms = df.swifter.progress_bar(False).apply(ols_lstsq_raw, axis=1, raw=True)
result_apply_raw_swifter = pd.Series(ms)

38.8 ms ± 133 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Numba + apply (raw=True)

In [21]:
%%timeit

ms = df.apply(ols_lstsq_raw_numba, axis=1, raw=True)
result_apply_raw_numba = pd.Series(ms)

5.36 ms ± 17.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Numba + Dask + apply (raw=True)

In [4]:
%%timeit

ddf = dd.from_pandas(df, npartitions=8, sort=False)

result_apply_raw_numba_dask = ddf.apply(ols_lstsq_raw_numba, axis=1, raw=True, meta=(None, 'float64',)).compute(scheduler='processes')

2.67 s ± 52.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Sklearn + apply

In [11]:
%%timeit

ms = df.apply(ols_sklearn, axis=1)
result = pd.Series(ms)

183 ms ± 3.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Sklearn + apply (raw=True)

In [6]:
%%timeit

ms = df.apply(ols_sklearn_raw, axis=1, raw=True)
result = pd.Series(ms)

173 ms ± 2.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## NumPy + parallel_apply

In [18]:
%%timeit

ms = df.parallel_apply(ols_lstsq, axis=1)
result = pd.Series(ms)

258 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## NumPy + parallel_apply (raw=True)

In [15]:
%%timeit

ms = df.parallel_apply(ols_lstsq_raw, axis=1, raw=True)
result = pd.Series(ms)

245 ms ± 8.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## NumPy + Transpose + apply

In [22]:
ms

0   -0.009935
dtype: float64