In [None]:
import polars as pl
import polars_ds as pds
# Requires version >= v0.5.1
print(pds.__version__)

In [None]:
df = pl.DataFrame({
    "id": [0, 1, 2, 3, 4, 5],
    "values": [0, 1, 2, 3, 4, 5],
    "a": [0.1, 1, 10, 100, float('nan'), 1.0],
    "b": [0.15, 1.5, 15, 150, 1.0, None],
    "c": [0.12, 1.2, 12, 120, 2.0, 2.0],
})
df

In [None]:
df.select(
    pl.col("id").cast(pl.Float64),
    pds.query_knn_ptwise(
        "a", "b", "c",
        index = "id",
        k = 2,
        dist = "sql2",
    ).alias("avg")
)

In [None]:
df.select(
    pl.col("id").cast(pl.Float64),
    pds.query_knn_avg(
        "a", "b", "c",
        target = "id",
        k = 2,
        dist = "sql2",
        weighted = False,
    ).alias("avg")
)

In [None]:
size = 1000
df = pds.random_data(size=size, n_cols=0).select(
    pds.random(0.0, 1.0).alias("x1"),
    pds.random(0.0, 1.0).alias("x2"),
    pds.random(0.0, 1.0).alias("x3"),
    pds.random(0.0, 1.0).alias("x4"),
    pds.random(0.0, 1.0).alias("x5"),
    pds.random(0.0, 1.0).alias("x6"),
    pl.when(pds.random(0.0, 1.0) < 0.1).then(None).otherwise(pds.random(0.0, 1.0)).alias("x7"),
    pds.random_int(0, 3).alias("categories"),
    id = pl.Series(values=list(range(size)))
).with_columns(
    pl.col("id").cast(pl.UInt32),
    y = pl.col("x1") * 0.5 + pl.col("x2") * 0.25 - pl.col("x3") * 0.15 + pds.random() * 0.0001,
)

In [None]:
window_size = 5

df_test = df.select(
    "id",
    "y",
    pds.query_rolling_lstsq(
        "x1", "x2", "x3",
        target = "y",
        window_size = window_size,
    ).alias("result")
).unnest("result") # .limit(10)
df_test = df_test.filter(
    pl.col("id") >= window_size - 1
).select("coeffs")
df_test

In [None]:
results = []
for i in range(len(df) - window_size + 1):
    temp = df.slice(i, length = window_size)
    results.append(
        temp.select(
            pds.query_lstsq(
                "x1", "x2", "x3",
                target = "y"
            ).alias("coeffs")
        )
    )

df_answer = pl.concat(results)
df_answer

In [None]:
from polars.testing import assert_frame_equal

assert_frame_equal(df_test, df_answer)

In [None]:
df.select(
    pds.query_lstsq(
        pl.col("x1"), pl.col("x2"), 
        target = "y",
        skip_null = True
    )
)

In [None]:
df

In [None]:
df.select(
    pds.query_knn_ptwise(
        "a", "b", "c",
        index = "id",
        return_dist = True,
        k = 2,
        dist = "sql2",
    ).alias("avg")
)

In [None]:
df.select(
    pds.query_knn_avg(
        "a", "b", "c",
        target = "id",
        k = 2,
        dist = "sql2",
    ).alias("avg")
)

In [None]:
df = pl.DataFrame({
    "friends":[[0,1], [1,0], [2], [3]]
})
df

In [None]:
size = 1_000
df = pds.random_data(size=size, n_cols=0).select(
    pds.random(0.0, 1.0).alias("x1"),
    pds.random(0.0, 1.0).alias("x2"),
    pds.random(0.0, 1.0).alias("x3"),
    pds.random_int(0, 3).alias("categories"),
    id = pl.Series(values=list(range(size)))
).with_columns(
    pl.col("id").cast(pl.UInt32),
    y = pl.col("x1") * 0.5 + pl.col("x2") * 0.25 - pl.col("x3") * 0.15 + pds.random() * 0.0001,
)

In [None]:
# %%timeit
df_recursive_lr = df.select(
    "y",
    pds.query_recursive_lstsq(
        "x1", "x2", "x3",
        target = "y",
        start_at = 3,
    ).alias("result")
).unnest("result") # .limit(10)
df_recursive_lr

In [None]:
df_recursive_lr["betas"][3].to_numpy()