In [None]:
import polars as pl
import polars_ds as pds
print(pds.__version__)

In [None]:
size = 2000
df = pl.DataFrame({
    "id": range(size), 
}).with_columns(
    pds.random().alias("var1"),
    pds.random().alias("var2"),
    pds.random().alias("var3"),
    pds.random().alias("r"),
    (pds.random() * 10).alias("rh"),
    pl.col("id").cast(pl.UInt32)
)

In [None]:
import numpy as np

X = df.select("var1", "var2", "var3").to_numpy(order="c")
X

In [None]:
from polars_ds._polars_ds import PyKDT

In [None]:
kdt = PyKDT(X, distance = "sql2")

In [None]:
%timeit kdt.knn(X, k = 3, epsilon = 0., max_dist_bound = 9999.0, parallel = False)
%timeit kdt.knn(X, k = 3, epsilon = 0., max_dist_bound = 9999.0, parallel = True)

In [None]:
kdt.knn(X, k = 3, epsilon = 0., max_dist_bound = 9999.0, parallel = False)

In [None]:
from scipy.spatial import KDTree

In [None]:
tree = KDTree(X)

In [None]:
tree.query(X, k = 3, p = 2, distance_upper_bound = 9999.0)

In [None]:
%timeit tree.query(X, k = 3, p = 2, distance_upper_bound = 9999.0)
%timeit tree.query(X, k = 3, p = 2, workers=-1, distance_upper_bound = 9999.0)

In [None]:
X.flags['C_CONTIGUOUS']

In [None]:
%%timeit
df.select(
    pds.convolve(
        "x1",
        kernel = [0.5] * 10,
        method = "fft",
        mode = "valid"
    )
) # 705

In [None]:
df.select(
    pds.query_lstsq(
        "x1", "x2", "x3",
        target = "y",
        l1_reg = l1_reg,
        l2_reg = l2_reg,
        tol = 1e-6
    )
)

In [None]:
from sklearn.linear_model import ElasticNet
x = df.select("x1", "x2", "x3").to_numpy()
y = df.select("y").to_numpy()

In [None]:
model = ElasticNet(alpha = alpha, l1_ratio= l1_ratio, fit_intercept=False)

In [None]:
model.fit(x, y)

In [None]:
model.coef_

In [None]:
import numpy as np

x = df.select("x1", "x2", "x3").to_numpy()
y = df.select("y").to_numpy()

In [None]:
np.linalg.lstsq(x, y, rcond = 0.5)

In [None]:
res = df.select(
    pds.query_lstsq_w_rcond(
        "x1", "x2", "x3",
        target = "y",
        rcond = 0.5,
        method = "l2",
    ).alias("result")
).unnest("result")

In [None]:
res

In [None]:
coeffs = res["coeffs"][0].to_numpy()
svs = res["singular_values"][0].to_numpy()

coeffs

In [None]:
svs

In [None]:
np.sqrt(4144.9180)

In [None]:
from polars_ds.linear_models import LR, OnlineLR


In [None]:
X = df.select("x1", "x2", "x3").to_numpy()
y = df.select("y").to_numpy()

In [None]:
import numpy as np

In [None]:
model = LR(lambda_=0.1, fit_bias=True) # Ridge Regression
online_model = OnlineLR(fit_bias = True) # Normal, online regression with a bias term


In [None]:
online_model.fit(X[:10], y[:10])

In [None]:
np.linalg.lstsq(X[:10], y[:10])

In [None]:
query

In [None]:
df.select(
    pds.query_similar_count(
        query = query,
        target = "x1",
        metric = "sql2",
        threshold = 0.5
    )
)

In [None]:
df.select(
    pds.query_similar_count(
        query = [0.5, 0.5, 0.1, 0.1, 0.12, 0.22],
        target = "x1",
        metric = "sql2",
        threshold = 0.1
    )
)

In [None]:
q = pl.Series([0.5, 1.0, 0.3])
qq = pl.lit(q)
df.select(
    (qq - qq.mean()) / qq.std()
)