# Linear models with Polars-ols

The Polars-ols plugin allows to fit linear models using Polars expressions.


In [1]:
import polars as pl
import polars_ols as pls  
import numpy as np

Polars-ols is a Polars *plugin*. 

When we import a plugin, it registers its *namespace* with Polars. 

A namespace is a set of expressions that are gathered under a title.

In [None]:
df = pl.DataFrame(
    {
        "x1": [0.72, -2.43, -0.63, 0.05, -0.07, 0.65, -0.02, -1.64, -0.92, -0.27],
        "x2": [0.24, 0.18, -0.95, 0.23, 0.44, 1.01, -2.08, -1.36, 0.01, 0.75],
    }
).with_columns(
    y = 2 * pl.col("x1") + 3 * pl.col("x2") + np.random.standard_normal(10)
)

df.head()

x1,x2,y
f64,f64,f64
0.72,0.24,2.146189
-2.43,0.18,-4.171584
-0.63,-0.95,-3.602901
0.05,0.23,1.808428
-0.07,0.44,0.82377


In [7]:
df.plot.scatter(
    x="x1",
    y="y"
)

## Starting fitting ordinary least squares(OLS)

In [8]:
ols_expr = pl.col("y").least_squares.ols(
    pl.col("x1"),
    pl.col("x2"),
).alias("ols")

### Add expression in `DataFrame`

In [9]:
df.with_columns(
    ols_expr
)

x1,x2,y,ols
f64,f64,f64,f64
0.72,0.24,2.146189,2.153236
-2.43,0.18,-4.171584,-4.35733
-0.63,-0.95,-3.602901,-4.059114
0.05,0.23,1.808428,0.776567
-0.07,0.44,0.82377,1.152502
0.65,1.01,4.350243,4.275685
-0.02,-2.08,-6.728496,-6.153822
-1.64,-1.36,-7.548245,-7.295168
-0.92,0.01,-1.519577,-1.8206
-0.27,0.75,-0.083639,1.661492


## Coefficients

In [11]:
ols_coeff_expr = pl.col("y").least_squares.ols(
    pl.col("x1"),
    pl.col("x2"),
    mode="coefficients",
    add_intercept=True
).alias("ols_intercept")

In [12]:
df.select(
    ols_coeff_expr
)

ols_intercept
struct[3]
"{1.967126,2.935878,-0.106382}"


The order here is `x1`, `x2`,`intercept` with `pl.struct`.

In [14]:
df.select(
    ols_coeff_expr
).unnest("ols_intercept")

x1,x2,const
f64,f64,f64
1.967126,2.935878,-0.106382


## Regularized regression

- Lasso regression (that uses an L1 norm for the regularization)
- Ridge regression(that uses an L2 norm for the regularization)
- Elastic regression (that uses both L1 and L2 norms for the regularization)

In [15]:
lasso_expr = pl.col("y").least_squares.lasso(
    pl.col("x1"), 
    pl.col("x2"), 
    alpha=0.0001, 
    add_intercept=True
)

ridge_expr = pl.col("y").least_squares.ridge(
    pl.col("x1"), 
    pl.col("x2"), 
    alpha=0.0001, 
    add_intercept=True
)

elastic_expr = pl.col("y").least_squares.elastic_net(
    pl.col("x1"), 
    pl.col("x2"), 
    alpha=0.0001,
    l1_ratio=0.5, 
    add_intercept=True
)

In [None]:
df.with_columns(
    lasso_expr.round(3).alias("predictions_lasso"),
    ridge_expr.round(3).alias("predictions_ridge"),
    elastic_expr.round(3).alias("predictions_elastic"),
)

x1,x2,y,predictions_lasso,predictions_ridge,predictions_elastic
f64,f64,f64,f64,f64,f64
0.72,0.24,2.146189,2.015,2.015,2.014
-2.43,0.18,-4.171584,-4.358,-4.358,-4.358
-0.63,-0.95,-3.602901,-4.135,-4.135,-4.135
0.05,0.23,1.808428,0.667,0.667,0.667
-0.07,0.44,0.82377,1.048,1.048,1.048
0.65,1.01,4.350243,4.137,4.137,4.137
-0.02,-2.08,-6.728496,-6.252,-6.252,-6.252
-1.64,-1.36,-7.548245,-7.325,-7.325,-7.325
-0.92,0.01,-1.519577,-1.887,-1.887,-1.887
-0.27,0.75,-0.083639,1.564,1.564,1.564


## Fitting models by groups

In [18]:
df_groups = pl.DataFrame(
    {
        "y": [1.16, -2.16, -1.57, 0.21, 0.22, 1.6, -2.11, -2.92, -0.86, 0.47],
        "x1": [0.72, -2.43, -0.63, 0.05, -0.07, 0.65, -0.02, -1.64, -0.92, -0.27],
        "x2": [0.24, 0.18, -0.95, 0.23, 0.44, 1.01, -2.08, -1.36, 0.01, 0.75],
        "groups":[0]*5 + [1]*5
    }
)
df_groups

y,x1,x2,groups
f64,f64,f64,i64
1.16,0.72,0.24,0
-2.16,-2.43,0.18,0
-1.57,-0.63,-0.95,0
0.21,0.05,0.23,0
0.22,-0.07,0.44,0
1.6,0.65,1.01,1
-2.11,-0.02,-2.08,1
-2.92,-1.64,-1.36,1
-0.86,-0.92,0.01,1
0.47,-0.27,0.75,1


### Fit separated linear model for each group

In [19]:
ols_groups_expr = pl.col(
    "y"
).least_squares.ols(
    pl.col("x1"),
    pl.col("x2")
).over("groups").alias("ols")

In [21]:
df_groups.with_columns(
    ols_groups_expr
)

y,x1,x2,groups,ols
f64,f64,f64,i64,f64
1.16,0.72,0.24,0,0.948144
-2.16,-2.43,0.18,0,-2.229383
-1.57,-0.63,-0.95,0,-1.555128
0.21,0.05,0.23,0,0.274997
0.22,-0.07,0.44,0,0.362075
1.6,0.65,1.01,1,1.624467
-2.11,-0.02,-2.08,1,-2.094804
-2.92,-1.64,-1.36,1,-2.912726
-0.86,-0.92,0.01,1,-0.862589
0.47,-0.27,0.75,1,0.492418
