In [1]:
from numba import njit, prange
import numpy as np
from sklearn.datasets import make_blobs

In [2]:
X, y = make_blobs(n_samples=100000, n_features=100, centers=10, random_state=42)

In [3]:
def _residuals_simple(X, y, coef):
    return np.abs(np.dot(X, coef) - y)


@njit(parallel=True, fastmath=True)
def _residuals_numba(X, y, coef):
    return np.abs(np.dot(X, coef) - y)


def _make_x_y(data):
    features = data.T
    assumed_ys = features[0]
    modelled_xs = np.hstack([np.ones((data.shape[0], 1)),
                             features[1:].T])
    return modelled_xs, assumed_ys


def _lstsq_simple(X, y):
    default = -1
    coefficients, _, _, _ = np.linalg.lstsq(X, y, rcond=default)
    return coefficients

@njit
def _lstsq_numba(X, y):
    default = -1
    coefficients, _, _, _ = np.linalg.lstsq(X, y, rcond=default)
    return coefficients

In [4]:
_COEF = None
_X = None
_y = None

def _find_residuals(data):
    modelled_xs, assumed_ys = _make_x_y(data)
    coefficients = _lstsq_numba(modelled_xs, assumed_ys)
    global _COEF
    _COEF = coefficients
    global _X
    _X = modelled_xs
    global _y
    _y = assumed_ys
    residuals = _residuals_numba(modelled_xs, assumed_ys, coefficients)
    return residuals

resid_orig = _find_residuals(X)
resid_orig.shape, _X.shape

((100000,), (100000, 100))

In [5]:
%timeit _lstsq_simple(_X, _y)

582 ms ± 54.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%timeit _lstsq_numba(_X, _y)

683 ms ± 92.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
%timeit _residuals_simple(_X, _y, _COEF)

6.68 ms ± 89.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
%timeit _residuals_numba(_X, _y, _COEF)

3.52 ms ± 141 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
_, resid, *_ = np.linalg.lstsq(_X, _y, rcond=-1)
resid.shape

(1,)