In [1]:
import polars as pl
import polars_ols as pls
import numpy as np

In [59]:
def _make_data(n_samples: int = 2_000, 
               n_features: int = 5,
               n_groups: int = 5,
               noise: float = 0.1,
              ) -> pl.DataFrame:
    x = np.random.normal(size=(n_samples, n_features))
    eps = np.random.normal(size=n_samples, scale=noise)
    return pl.DataFrame(data=x, schema=[f"x{i + 1}" for i in range(n_features)]).with_columns(
        y=pl.lit(-1 * x.sum(1) + eps),
        group=pl.lit(np.random.randint(0, n_groups, size=n_samples)),
        sample_weights=pl.lit(np.random.rand(n_samples)),
    )

In [60]:
df = _make_data(n_samples=2_000, n_features=3, n_groups=5)

In [61]:
df

x1,x2,x3,y,group,sample_weights
f64,f64,f64,f64,i64,f64
0.601261,-0.052977,0.21547,-0.888736,0,0.116126
1.731443,0.646151,-2.423311,0.046472,3,0.323262
-0.708048,1.789624,-0.234373,-0.705809,1,0.757549
-1.017972,0.777864,0.203982,-0.168796,1,0.478503
1.065668,-0.483242,-0.550395,-0.171939,0,0.872105
…,…,…,…,…,…
-0.648736,0.021036,2.149515,-1.522977,1,0.649506
-0.427719,-0.216672,0.445517,0.174263,2,0.876079
-1.709156,-1.779753,1.061953,2.46746,1,0.118507
-0.255468,0.194217,1.395433,-1.367962,0,0.518952


### 1. Basic Usage: OLS / WLS
- You can use `pls.compute_least_squares` or `least_squares.ols` from the registered namespace. They are equivalent.
- Simply pass an expression producing strictly positive sample weights to `sample_weights` argument to perform WLS

In [62]:
ols_expr = pls.compute_least_squares(pl.col("y"),  # target
                          pl.col("x1"), pl.col("x2"), pl.col("x3"),  # features
                          mode="predictions",
                          )
assert str(ols_expr) == str(pl.col("y").least_squares.ols(pl.col("x1"), pl.col("x2"), pl.col("x3")))

wls_expr = pl.col("y").least_squares.wls(pl.col("x1"), pl.col("x2"), pl.col("x3"), 
                                         sample_weights=pl.col("sample_weights"))

- The expressions returned are normal polars expressions. You can operate on them lazily, so for example we can compute OLS per group in parallel using `.over(...)` or multiply it by some other expression etc.

In [63]:
df.lazy().with_columns(ols_expr.over("group").alias("predictions_ols_group"),
                ols_expr.alias("predictions_ols"),
                (wls_expr * (pl.col("group") == 2)).alias("predictions_wls_masked"),
               ).collect().tail(10)

x1,x2,x3,y,group,sample_weights,predictions_ols_group,predictions_ols,predictions_wls_masked
f64,f64,f64,f64,i64,f64,f32,f32,f32
-1.694323,1.954149,0.327192,-0.60444,1,0.117535,-0.59859,-0.589218,-0.0
1.773093,0.765854,-1.431583,-0.970678,4,0.839584,-1.094716,-1.103445,-0.0
-0.176328,0.573347,-0.614593,0.21017,4,0.705172,0.217926,0.216044,0.0
0.021171,0.058956,0.296067,-0.416454,2,0.753221,-0.374001,-0.375123,-0.375364
1.012068,-1.626015,0.2702,0.345547,1,0.973876,0.355354,0.34566,0.0
-0.648736,0.021036,2.149515,-1.522977,1,0.649506,-1.509339,-1.517859,-0.0
-0.427719,-0.216672,0.445517,0.174263,2,0.876079,0.197837,0.198173,0.198149
-1.709156,-1.779753,1.061953,2.46746,1,0.118507,2.431038,2.420419,0.0
-0.255468,0.194217,1.395433,-1.367962,0,0.518952,-1.347131,-1.330657,-0.0
1.281586,0.069997,-0.707767,-0.485081,2,0.882951,-0.646275,-0.640843,-0.641546


- The `mode` parameter controls the type of output produced. You can choose from {`predictions`, `coefficients`, `residuals`}. It defaults to `predictions`.
- `coefficients` normally resizes the output to match the number of features

In [64]:
df.select(pl.col("y").least_squares.ols(pl.col("x1"), pl.col("x2"), add_intercept=True, mode="coefficients")
          .alias("coefficients"))

coefficients
f32
-1.0107
-1.012987
0.002998


### 2. Regularized Models
- Ridge `least_squares.ridge`, Lasso `least_squares.lasso`, Elastic Net `least_squares.lasso` with optional non-negative constraint are implemented
- Apart from ridge, which is solved in closed form, the rust implementation for regularized models is cyclic coordinate descent with a soft thresholding function that supports an arbitrary combination of L1 / L2 penalties and non-negative constraint.
- `sample_weights` and `mode` are general parameters applicable to all models supported by this package

Parameters specific to regularized models are contained in `OLSKwargs`:
- alpha: scalar representing L1 or L2 penalty strength.
- l1_ratio: mixing parameter for ElasticNet regularization (0 for Ridge, 1 for LASSO).
- max_iter: maximum number of coordinate descent iterations
- tol: tolerance for convergence criterion
- positive: boolean enforcing non-negativity constraints on coefficients

In [65]:
# inspect OLS Kwargs
pls.OLSKwargs?

[0;31mInit signature:[0m
[0mpls[0m[0;34m.[0m[0mOLSKwargs[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0malpha[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mfloat[0m[0;34m][0m [0;34m=[0m [0;36m0.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0ml1_ratio[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mfloat[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_iter[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;36m1000[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mfloat[0m[0;34m][0m [0;34m=[0m [0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpositive[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mbool[0m[0;34m][0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      Specifies parameters relevant for regularized linear models: 

In [75]:
elastic_net_expr = pl.col("y").least_squares.elastic_net(pl.col("x1"), pl.col("x2"), pl.col("x3"),
                                                         alpha=0.0001,
                                                         l1_ratio=0.5,
                                                         positive=True,
                                                         mode="coefficients",
                                                         ).alias("coef_enet_non_negative")

ridge_expr = pl.col("y").least_squares.ridge(pl.col("x1"), pl.col("x2"), pl.col("x3"),
                                             alpha=100.0, 
                                             sample_weights=pl.col("sample_weights"),
                                             mode="coefficients").alias("coef_ridge")

df.select(elastic_net_expr, ridge_expr)

coef_enet_non_negative,coef_ridge
f32,f32
0.0,-0.908504
0.0,-0.907049
0.0,-0.914558


### 3. Formula API

- For those who like specifying models in patsy formula syntax, that is also supported
- You can either use the `least_squares_from_formula` module level public function or `least_squares.from_formula` from registed namespace
- It tries to be clever and maps to the correct underlying implementation based on the model specific parameters you specify

In [83]:
# compute the residuals in two equivalent ways
df.select(
    # "x2:x3" denotes multiplicative interaction, "-1" dentotes no intercept
    pls.least_squares_from_formula("y ~ x1 + x2:x3 -1", mode="residuals").alias("residuals_1"), 
    (pl.col("y") - pl.col("y").least_squares.from_formula("x1 + x2:x3 -1", mode="predictions")).alias("residuals_2"),
).corr()

residuals_1,residuals_2
f64,f64
1.0,1.0
1.0,1.0


In [85]:
nnls_formula_expr = pl.col("y").least_squares.from_formula("x1 + x2 + x3",
                                       alpha=0.0001,
                                       positive=True,
                                       )  # knows to use the coordinate descent implementation because of non-negativity


ridge_formula_expr = pl.col("y").least_squares.from_formula("x1 + x2 + x3",
                                       alpha=0.0001,
                                       sample_weights=pl.col("sample_weights"),
                                       )  # knows that it needs to use closed form ridge w/ sample weighting

### 4. Dynamic Regression Models

- Consider the situation where you want to compute coefficients in an expanding or rolling window manner
    - naively, you could manually re-compute standard OLS function over consecutive windows (e.g. `.rolling(...).agg(...)`)
    - ... but that would be wasteful: (X.T X) and (X.T Y) are only changing by one row (in case of expanding) or two rows (in case of rolling, an addition and a subtraction)
- This extension package provides rust implementations `.least_squares.{rolling_ols, expanding_ols, rls}` which efficiently update coefficients as new samples are observed
- See [insert URL] for details, but the key idea is to make use of Sherman-Morrison or Woodbury Identity to recursively update summary statistics or coefficient vectors
- Formula API is also supported and the correct implementation is chosen based on parameters provided

In [115]:
df.select(
    pl.col("y").least_squares.from_formula("x1 + x2 + x3 -1", 
                                           window_size=252, 
                                           min_periods=5, 
                                           alpha=0.0001,  
                                           mode="coefficients").over("group").alias("rolling_ridge_coef"),
    pl.col("y").least_squares.rls(
        pl.col("x1"), pl.col("x2"), pl.col("x3"),
        half_life=21.0, # exponential memory proportional to a half-life of 21 samples
        initial_state_mean=[-1.0, -1.0, -1.0],  # prior mean for initial coefficients
        initial_state_covariance=10.0,  # inversely proportional to L2 prior towards prior mean
        mode="coefficients",
    ).over("group").alias("recursive_least_squares_coef"),
    pl.col("y").least_squares.expanding_ols(pl.col("x1"), pl.col("x2"), pl.col("x3"), 
                                           mode="predictions").alias("expanding_ols_pred"),
    
)


rolling_ridge_coef,recursive_least_squares_coef,expanding_ols_pred
list[f32],list[f32],f32
"[0.0, 0.0, 0.0]","[-1.184433, -0.98375, -1.066094]",-0.71473
"[0.0, 0.0, 0.0]","[-0.999859, -0.999947, -1.000197]",0.036505
"[0.0, 0.0, 0.0]","[-1.026656, -0.932625, -1.008824]",-0.647632
"[0.0, 0.0, 0.0]","[-0.702152, -0.826215, -1.180636]",-0.022153
"[0.0, 0.0, 0.0]","[-1.176702, -0.996126, -1.091328]",-0.022653
…,…,…
"[-0.993268, -1.003389, -0.993135]","[-0.955148, -0.971003, -0.987288]",-1.517609
"[-0.996397, -0.98888, -0.998935]","[-0.421874, -1.801489, -0.889335]",0.198206
"[-0.993697, -1.003618, -0.993094]","[-1.015508, -1.010919, -1.005117]",2.42057
"[-0.994889, -1.001311, -1.006162]","[-0.782951, -1.192956, -0.957612]",-1.330491
