In [15]:
import numpy as np
import polars as pl
from polars_pipeline import Pipeline
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)
all_df = pl.from_numpy(
    np.concatenate([X, y.reshape(-1, 1)], axis=1),  # type: ignore
    schema={
        "sepal_length": pl.Float32,
        "sepal_width": pl.Float32,
        "petal_length": pl.Float32,
        "petal_width": pl.Float32,
        "species": pl.UInt8,
    },
).with_columns(pl.col("species").cast(pl.Utf8).cast(pl.Categorical))
all_df

sepal_length,sepal_width,petal_length,petal_width,species
f32,f32,f32,f32,cat
5.1,3.5,1.4,0.2,"""0"""
4.9,3.0,1.4,0.2,"""0"""
4.7,3.2,1.3,0.2,"""0"""
4.6,3.1,1.5,0.2,"""0"""
5.0,3.6,1.4,0.2,"""0"""
…,…,…,…,…
6.7,3.0,5.2,2.3,"""2"""
6.3,2.5,5.0,1.9,"""2"""
6.5,3.0,5.2,2.0,"""2"""
6.2,3.4,5.4,2.3,"""2"""


In [18]:
plotter = (
    Pipeline(log_dir="log")
    .plot.box()
    .plot.violin()
    .plot.hist(hue="species")
    .plot.kde(hue="species")
    .plot.scatter(hue="species")
    .plot.kde2d(hue="species")
    .plot.corr_heatmap()
    .plot.umap(Pipeline().drop("species"))
)
plotter.fit_transform(all_df)

Boxplot: 100%|██████████| 4/4 [00:00<00:00, 11.00it/s]
Violinplot: 100%|██████████| 4/4 [00:00<00:00,  7.11it/s]
Histogram: 100%|██████████| 4/4 [00:00<00:00,  5.76it/s]
KDE: 100%|██████████| 4/4 [00:00<00:00,  5.96it/s]
Scatter: 100%|██████████| 6/6 [00:00<00:00, 10.63it/s]
KDE 2D: 100%|██████████| 6/6 [00:01<00:00,  3.72it/s]


UMAP( verbose=True)
Thu Jul 25 00:03:32 2024 Construct fuzzy simplicial set
Thu Jul 25 00:03:32 2024 Finding Nearest Neighbors
Thu Jul 25 00:03:32 2024 Finished Nearest Neighbor Search
Thu Jul 25 00:03:32 2024 Construct embedding


Epochs completed:  30%| ███        152/500 [00:01]

	completed  0  /  500 epochs
	completed  50  /  500 epochs
	completed  100  /  500 epochs
	completed  150  /  500 epochs
	completed  200  /  500 epochs
	completed  250  /  500 epochs
	completed  300  /  500 epochs
	completed  350  /  500 epochs
	completed  400  /  500 epochs
	completed  450  /  500 epochs


Epochs completed: 100%| ██████████ 500/500 [00:01]


Thu Jul 25 00:03:34 2024 Finished embedding


UMAP Plot: 100%|██████████| 5/5 [00:00<00:00, 22.69it/s]


sepal_length,sepal_width,petal_length,petal_width,species
f32,f32,f32,f32,cat
5.1,3.5,1.4,0.2,"""0"""
4.9,3.0,1.4,0.2,"""0"""
4.7,3.2,1.3,0.2,"""0"""
4.6,3.1,1.5,0.2,"""0"""
5.0,3.6,1.4,0.2,"""0"""
…,…,…,…,…
6.7,3.0,5.2,2.3,"""2"""
6.3,2.5,5.0,1.9,"""2"""
6.5,3.0,5.2,2.0,"""2"""
6.2,3.4,5.4,2.3,"""2"""
