In [1]:
import polars as pl
import polars_ds as pds

In [2]:
import numpy as np

df = pl.DataFrame(
    {"x": np.random.normal(size=500_000)}).with_row_index().with_columns(pl.lit(1).alias("const")
)

kernel = np.array([1] * 100)

In [3]:
df.select(pds.convolve("x", kernel))


x
f64
0.09674
0.098389
-0.832692
-2.528362
-2.449196
…
1.589634
0.713775
1.39032
-0.547153


In [4]:
df.select(pds.convolve("x", kernel, parallel=True))

x
f64
0.09674
0.098389
-0.832692
-2.528362
-2.449196
…
1.589634
0.713775
1.39032
-0.547153


In [5]:
from scipy.signal import convolve

In [6]:
arr1 = df["x"].to_numpy()
arr2 = kernel

In [7]:
convolve(arr1, arr2)

array([ 0.09673956,  0.0983889 , -0.83269205, ...,  1.39032038,
       -0.5471529 ,  0.2443881 ])

In [8]:
%timeit df.select(pds.convolve("x", kernel))
%timeit df.select(pds.convolve("x", kernel, parallel=True))
%timeit df.select(pds.convolve("x", kernel, method="fft"))
%timeit convolve(arr1, arr2)

9.2 ms ± 37.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.52 ms ± 38.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
83.5 ms ± 246 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
7.08 ms ± 35.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
df = pds.random_data(size=100_000, n_cols = 0).select(
    pds.random_int(0, 200).alias("x"),
    pds.random_int(0, 200).alias("y"),
    pl.Series([1] * 50_000 + list(range(50_000, 100_000))).alias("test")
)
df.head()

In [None]:
df.select(
    pl.col("x").qcut(10, left_closed=False, allow_duplicates=True, include_breaks=True)
        .struct.field("brk")
        .value_counts()
        .sort()
).unnest("brk")

In [None]:
df.select(
    pl.corr("x", "y")
)

In [None]:

df.select(
    pds.kendall_tau("x", "y")
)

In [None]:
from scipy.stats import kendalltau

x = df["x"].to_numpy()
y = df["y"].to_numpy()

In [None]:
%%timeit
kendalltau(x,y, nan_policy="omit")

In [None]:
df.sort(pl.col("x").rank(method="random")).select(
    "x",
    "y",
    pl.col("y").rank(method="max").cast(pl.Float64).alias("r"),
    (-pl.col("y")).rank(method="max").cast(pl.Float64).alias("l"),
).with_columns(
    pl.col("r").diff().abs().alias("r_abs_diff"),
    (pl.col("l") * (pl.len() - pl.col("l"))).alias("l(n-l)"),
).select(
    1 - (pl.len() / 2) * (pl.col("r_abs_diff").sum() / pl.col("l(n-l)").sum())
)