# Minimum Norm Interpolant

In [2]:
import numpy as np
import jax.numpy as jnp
import lineax as lx
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from statsmodels.api import OLS


In [3]:
def sparse_dgp(n = 10_000, p = 20_000, eta = 0.1):
    X = np.c_[np.repeat(1, n),
            np.random.normal(size = n*p).reshape((n, p))
        ]
    # initialize coef vector
    β, nzcount = np.repeat(0.0, p + 1), int(eta * p)
    # choose nzcount number of non-zero coef
    nzid = np.random.choice(p, nzcount, replace=False)
    # set them to random values
    β[nzid] = np.random.randn(nzcount)
    # build in heteroskedasticity
    e = np.random.normal(0, 0.5 + (0.1 * X[:, 1]>0), n)
    # generate y
    y = X @ β + e
    return y, X

y, X = sparse_dgp()


In [4]:
%%time
smols = OLS(y, X).fit()

CPU times: user 2h 23min 9s, sys: 58min 50s, total: 3h 22min
Wall time: 14min 32s


In [6]:

np.linalg.norm(smols.params)

31.803339628159765

Very fast least squares solver (including for minimum norm interpolation problems). 


In [7]:
%%time
sol = lx.linear_solve(                                    # solve # Ax = b
        operator = lx.MatrixLinearOperator(jnp.array(X)), # A
        vector = jnp.array(y),                            # b
        solver=lx.AutoLinearSolver(well_posed=None),      # auto solver with no well-posedness check
    )

betahat = sol.value
# does it interpolate
(y - X @ betahat).max()


CPU times: user 1h 24min 3s, sys: 1.51 s, total: 1h 24min 5s
Wall time: 6min 14s


Array(0.0001564, dtype=float32)

In [8]:
np.linalg.norm(betahat)


31.80334

In [9]:
%%time
m = LinearRegression()
m.fit(X, y)
(y - m.predict(X)).max()


CPU times: user 3h 1min 55s, sys: 0 ns, total: 3h 1min 55s
Wall time: 13min 44s


1.538325022920617e-12

In [10]:
np.linalg.norm(m.coef_)


31.8032580188364