# Least Squares via L-BFGS in ensmallen

In [1]:
import numpy as np
import pyensmallen

In [2]:
# Generate some random data for linear regression
n, k = 1_000_000, 20
np.random.seed(42)
X = np.random.randn(n, k)
print(true_params := np.random.rand(k))
y = X @ true_params + np.random.randn(n)

[0.51639859 0.94598022 0.23380001 0.55162275 0.97811966 0.24254699
 0.64702478 0.70271041 0.26476461 0.77362184 0.7817448  0.36874977
 0.72697004 0.06518613 0.72705723 0.38967364 0.03826155 0.39386005
 0.0438693  0.72142769]


## ERM with L-BFGS

## pyensmallen

In [3]:
def linear_regression_objective(params, gradient):
    params = params.reshape(-1, 1)
    residuals = X @ params - y.reshape(-1, 1)
    objective = np.sum(residuals**2)
    grad = 2 * X.T @ residuals
    gradient[:] = grad.flatten()
    return objective


In [4]:
%%time
# Create an L-BFGS optimizer
optimizer = pyensmallen.L_BFGS()
(result := optimizer.optimize(linear_regression_objective,
                            np.random.rand(k)))

CPU times: user 5 s, sys: 175 ms, total: 5.18 s
Wall time: 334 ms


array([0.51556024, 0.94691468, 0.23404849, 0.55121759, 0.97818756,
       0.24338623, 0.64700696, 0.70195589, 0.26487498, 0.77280983,
       0.78267599, 0.36787315, 0.72791074, 0.06571446, 0.72615144,
       0.38766298, 0.03820425, 0.39468909, 0.04304362, 0.72195013])

## scipy

In [5]:
import scipy.optimize

In [6]:
%%time
(result := scipy.optimize.minimize(
    fun = lambda b: np.sum((X @ b - y)**2),
    x0 = np.random.rand(k),
    jac = lambda b: 2 * X.T @ (X @ b - y),
    ).x
)

CPU times: user 1min 36s, sys: 2.57 s, total: 1min 39s
Wall time: 6.62 s


array([0.51556024, 0.94691468, 0.23404849, 0.55121759, 0.97818756,
       0.24338623, 0.64700696, 0.70195589, 0.26487498, 0.77280983,
       0.78267599, 0.36787315, 0.72791074, 0.06571446, 0.72615144,
       0.38766298, 0.03820425, 0.39468909, 0.04304362, 0.72195013])

## cvxpy

In [7]:
import cvxpy as cp

In [8]:
%%time
b = cp.Variable(k)
# cost = cp.sum_squares(X @ b - y)  / n
cost = cp.norm(X @ b - y, p=2)**2 / n
prob = cp.Problem(cp.Minimize(cost))
prob.solve(solver=cp.SCS)
b.value

CPU times: user 22.4 s, sys: 4 s, total: 26.4 s
Wall time: 26.3 s


array([0.51556024, 0.94691468, 0.23404849, 0.55121759, 0.97818756,
       0.24338623, 0.64700696, 0.70195589, 0.26487498, 0.77280983,
       0.78267599, 0.36787315, 0.72791074, 0.06571446, 0.72615144,
       0.38766298, 0.03820425, 0.39468909, 0.04304362, 0.72195013])

## closed form runtime

In [9]:
%%time
(np_solution := np.linalg.lstsq(X, y, rcond=None)[0])

CPU times: user 2.71 s, sys: 6.08 ms, total: 2.72 s
Wall time: 375 ms


array([0.51556024, 0.94691468, 0.23404849, 0.55121759, 0.97818756,
       0.24338623, 0.64700696, 0.70195589, 0.26487498, 0.77280983,
       0.78267599, 0.36787315, 0.72791074, 0.06571446, 0.72615144,
       0.38766298, 0.03820425, 0.39468909, 0.04304362, 0.72195013])