In [1]:
import polars as pl
import numpy as np
import polars_ds as pld

In [2]:
df = pl.DataFrame({
    "id": range(1000),
    "val1": np.random.random(size=1000), 
    "val2": np.random.random(size=1000), 
    "val3": np.random.random(size=1000),
    "r": np.random.random(size=1000),
    "rh": np.random.random(size=1000)*10,
})

In [3]:
df2 = df.with_columns(
    pl.col("id").num.query_radius_ptwise(
        pl.col("val1"), pl.col("val2"), pl.col("val3"), # Columns used as the coordinates in n-d space
        r = 0.1, 
        dist = "l2", # actually this is squared l2
        parallel = True
    ).list.slice(offset = 1).alias("best friends"),
    
    pl.col("id").cast(pl.UInt64)
)

In [4]:
df2.head()

id,val1,val2,val3,r,rh,best friends
u64,f64,f64,f64,f64,f64,list[u64]
0,0.412197,0.745551,0.7488,0.98582,2.623568,"[376, 274, … 133]"
1,0.441657,0.778822,0.16669,0.501126,8.679967,"[243, 122, … 316]"
2,0.108636,0.809123,0.441843,0.453452,5.023719,"[327, 408, … 303]"
3,0.608282,0.424983,0.838739,0.636821,1.229568,"[125, 291, … 558]"
4,0.957586,0.869677,0.705383,0.974019,7.89945,"[541, 468, … 899]"


In [8]:
df2.select(
    pl.col("best friends").graph.eigen_centrality().arg_max()
)

best friends
u32
416


In [None]:
df = pl.DataFrame({
    "a": range(1000),
    "b": ["cat"] * 200 + ["dogs"] * 500 + ["lizards"] * 300,
    "y": np.random.randint(0, high = 2, size = 1000)
})
df.head()

In [None]:
df = pl.DataFrame({
    "a": range(5000),
    "b": [np.sqrt(x) for x in range(5000)],
    "y": 0.1 * np.random.random(size=5000) + np.array(list(range(5000)))
})
df.head()

In [None]:

df.select(
    pl.col("y").num.lstsq_report(
        pl.col("a"), pl.col("b"),
        add_bias = False
    ).alias("report")
).unnest("report")

In [None]:
from statsmodels.api import OLS
target = df["y"].to_numpy()
data = df.select("a", "b").to_numpy()

In [None]:
df.select(
    pl.col("a1").num.psi(pl.col("a2"))
).item(0,0)

In [None]:
df.select(
    pl.col("b").num.psi_discrete(pl.col("a"))
)

In [None]:
df.select(
    pl.col("a").value_counts()
).unnest("a")

In [None]:
pl.col("a").value_counts().name