In [None]:
import polars as pl
import polars_ds as pld
import numpy as np

In [None]:
def get_random_data(size:int=4_000) -> pl.DataFrame: 
    df = pl.DataFrame({
        "id": range(size), 
    }).with_columns(
        pl.col("id").cast(pl.UInt64),
        pl.col("id").stats.sample_uniform(low=0.0, high=1.0).alias("val1"),
        pl.col("id").stats.sample_uniform(low=0.0, high=1.0).alias("val2"),
        pl.col("id").stats.sample_uniform(low=0.0, high=1.0).alias("val3"),
        pl.col("id").mod(3).alias("actuals")
    ).with_columns(
        pl.col("val1").stats.rand_null(0.2).alias("val1_nulls")
    )

    return df.select(
        pl.col("id"),
        pl.col("id").num.query_radius_ptwise(
            pl.col("val1"), pl.col("val2"), pl.col("val3"), # Columns used as the coordinates in n-d space
            r = 0.1, 
            dist = "l2", # actually this is squared l2
            parallel = True
        ).list.slice(offset=1).alias("friends"),
    )


In [None]:
df = get_random_data()
df.head()

In [None]:

df.select(
    pl.col("friends").graph.eigen_centrality2(n_iter = 30)
)

In [None]:
%%timeit
df.select(
    pl.col("friends").graph.eigen_centrality3(n_iter = 30)
)

In [None]:
# %%timeit
df.select(
    pl.col("friends").graph.reachable(node = 1503)
)

In [None]:
df = pl.read_parquet("bench_graph.parquet")
df.head()

In [None]:
edges_as_list = [list(s) for s in df["friends"]]
edges_as_list

In [None]:
res = df.select(
    pl.col("friends").graph.shortest_path(target=0, parallel=True)
)
for e in res["friends"]:
    print(e)

In [None]:
import networkx as nx

In [None]:
# Constructing Graph
graph = nx.Graph()
for i, edges in enumerate(df["friends"]):
    if edges is not None:
        for j in edges:
            graph.add_edge(i, j, weight = 1)

# Generating output
paths = []
for i in range(len(df["friends"])):
    try:
        path = nx.shortest_path(graph, i, 0, weight="weight")
        paths.append(path[1:])
    except Exception as _: # No path
        paths.append(None)

paths

In [None]:
df = pl.read_csv("sample.csv")
df

In [None]:
df.select(
    pl.col("wheel_x_speed").forward_fill().backward_fill()
).select(
    pl.col("wheel_x_speed").num.rfft()
)


In [None]:
df.select(
    pl.col("wheel_x_speed").forward_fill().backward_fill()
).select(
    pl.col("wheel_x_speed").num.rfft(return_full=True)
)


In [None]:
df.select(
    pl.col("val1").num
)

In [None]:
test = df.select(
    pl.col("val1").num.rfft().alias("arr"),
    pl.col("val2").num.rfft().arr.to_list().alias("list"),
) 
test.head()

In [None]:
%%timeit
test.select(
    pl.col("arr").arr.first().pow(2) + pl.col("arr").arr.last().pow(2)
)

In [None]:
%%timeit
test.select(
    pl.col("list").list.eval(pl.element().dot(pl.element()))
)

In [None]:
df.select(
    pl.concat_list(pl.col("val1"), pl.col("val2")).alias("list")
).with_columns(
    pl.col("list").list.to_array(2)
)

In [None]:

df = df.with_columns(
    pl.col("id").num.query_radius_ptwise(
        pl.col("val1"), pl.col("val2"), pl.col("val3"), # Columns used as the coordinates in n-d space
        r = 0.05, 
        dist = "l2", # actually this is squared l2
        parallel = True,
    ).alias("best friends")
) 
df

In [None]:
df.select(
    pl.col("best friends").list.len()
)

In [None]:
df.select(
    pl.col("best friends").graph.shortest_path_const_cost(19, parallel=True)
)

In [None]:
df.with_columns(
    pl.col("a").stats.rand_str(min_size=4,max_size=6).alias("s1"),
    # pl.col("a").stats.rand_str(min_size=4,max_size=6).alias("s2"),
) 

In [None]:
df.with_columns(
    pl.col("s1").str2.hamming(pl.col("s2"), pad = True).alias("hamming pad"),
    pl.col("s1").str2.hamming(pl.col("s2"), pad = False).alias("hamming no pad"),
)

In [None]:
df.with_columns(
    pl.col("s1").str2.hamming(pl.col("s2")).alias("hamming")
).filter(
    pl.col("s1").str2.hamming_filter(pl.col("s2"), bound = 2)
)

In [None]:
%%timeit
df.select(
    pl.col("s1").str2.levenshtein(pl.col("s2")) < 3
) # 3.25ms

In [None]:
%%timeit
df.select(
    pl.col("s1").str2.levenshtein_filter(pl.col("s2"), 3)
) # 2.43ms