Skip to content

Commit

Permalink
Merge pull request #190 from abstractqqq/update_polars
Browse files Browse the repository at this point in the history
  • Loading branch information
abstractqqq committed Jun 24, 2024
2 parents 6a0ce2e + c5b3e69 commit ae46996
Show file tree
Hide file tree
Showing 14 changed files with 1,286 additions and 1,253 deletions.
156 changes: 96 additions & 60 deletions Cargo.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars_ds"
version = "0.4.6"
version = "0.5.0"
edition = "2021"

[lib]
Expand All @@ -11,8 +11,8 @@ crate-type = ["cdylib"]

[dependencies]
pyo3 = {version = "*", features = ["abi3-py38", "extension-module"]}
pyo3-polars = {version = "0.13", features = ["derive"]}
polars = {version = "0.39", features = ["performant", "lazy", "parquet", "dtype-array", "diff", "array_count", "abs", "cross_join", "rank", "ndarray", "log", "cum_agg", "round_series", "nightly"]}
pyo3-polars = {version = "0.15", features = ["derive"]}
polars = {version = "0.41.1", features = ["performant", "lazy", "parquet", "dtype-array", "diff", "array_count", "abs", "cross_join", "rank", "ndarray", "log", "cum_agg", "round_series", "nightly"]}
num = "0.4.1"
faer = {version = "0.19", features = ["nightly"]}
faer-ext = {version = "0.2.0", features = ["ndarray"]}
Expand Down
476 changes: 238 additions & 238 deletions examples/basics.ipynb

Large diffs are not rendered by default.

1,846 changes: 923 additions & 923 deletions examples/diagnosis.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "polars_ds"
requires-python = ">=3.8"
version = "0.4.6"
version = "0.5.0"

license = {file = "LICENSE.txt"}
classifiers = [
Expand Down
2 changes: 1 addition & 1 deletion python/polars_ds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

logging.basicConfig(level=logging.INFO)

__version__ = "0.4.6"
__version__ = "0.5.0"


def l_inf_horizontal(*v: StrOrExpr, normalize: bool = False) -> pl.Expr:
Expand Down
6 changes: 5 additions & 1 deletion python/polars_ds/config.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# Configs used in transforms and pipelines
STREAM_IN_TRANSFORM: bool = False

# Nothing yet.

# STREAM_IN_TRANSFORM: bool = False
# Level of optimiztion and memory usage, etc.
6 changes: 4 additions & 2 deletions python/polars_ds/diagnosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,9 @@ def corr(

corrs = [
self._frame.select(
pl.lit(x).alias("column"), *(corr(x, y).alias(y) for y in self.numerics)
# This calls corr from .stats
pl.lit(x).alias("column"),
*(corr(x, y).alias(y) for y in self.numerics),
)
for x in to_check
]
Expand Down Expand Up @@ -373,7 +375,7 @@ def infer_prob(self) -> List[str]:
(
pl.col(c).list.eval((pl.element() >= 0.0).all()).list.first()
) # every number must be positive
& (pl.col(c).list.sum() == 1.0) # class prob must sum to 1
& ((pl.col(c).list.sum() - 1.0).abs() < 1e-6) # class prob must sum to 1
& (
pl.col(c).list.len().min() == pl.col(c).list.len().max()
) # class prob column must have the same length
Expand Down
15 changes: 5 additions & 10 deletions python/polars_ds/num.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,14 +586,10 @@ def query_approx_entropy(
else:
r: pl.Expr = pl.lit(filtering_level, dtype=pl.Float64)

# FILL NULL -- REMOVE AFTER POLARS UPDATE
rows = t.count() - m + 1
data = [r, t.slice(0, length=rows).cast(pl.Float64).fill_null(float("nan"))]
data = [r, t.slice(0, length=rows).cast(pl.Float64)]
# See rust code for more comment on why I put m + 1 here.
data.extend(
t.shift(-i).slice(0, length=rows).cast(pl.Float64).fill_null(float("nan"))
for i in range(1, m + 1)
)
data.extend(t.shift(-i).slice(0, length=rows).cast(pl.Float64) for i in range(1, m + 1))
# More errors are handled in Rust
return pl_plugin(
symbol="pl_approximate_entropy",
Expand Down Expand Up @@ -640,12 +636,11 @@ def query_sample_entropy(
t = str_to_expr(ts)
r = ratio * t.std(ddof=0)
rows = t.count() - m + 1
# FILL NULL -- REMOVE AFTER POLARS UPDATE
data = [r, t.slice(0, length=rows).cast(pl.Float64).fill_null(float("nan"))]

data = [r, t.slice(0, length=rows).cast(pl.Float64)]
# See rust code for more comment on why I put m + 1 here.
data.extend(
t.shift(-i).slice(0, length=rows).cast(pl.Float64).fill_null(float("nan"))
for i in range(1, m + 1)
t.shift(-i).slice(0, length=rows).cast(pl.Float64) for i in range(1, m + 1)
) # More errors are handled in Rust
return pl_plugin(
symbol="pl_sample_entropy",
Expand Down
2 changes: 1 addition & 1 deletion src/num/jaccard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ fn pl_list_jaccard(inputs: &[Series]) -> PolarsResult<Series> {
if s1.inner_dtype().is_integer() {
let out = _list_jaccard(s1, s2);
Ok(out.into_series())
} else if s1.inner_dtype() == DataType::String {
} else if s1.inner_dtype() == &DataType::String {
Ok(s1
.into_iter()
.zip(s2.into_iter())
Expand Down
6 changes: 1 addition & 5 deletions src/num/ols.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::utils::rechunk_to_frame;
/// OLS using Faer.
use faer::{prelude::*, Side};
use faer_ext::IntoFaer;
// use faer_ext::IntoFaer;
use crate::utils::rechunk_to_frame;
use itertools::Itertools;
use ndarray::{s, Array2};
use polars::prelude::*;
Expand Down Expand Up @@ -92,10 +91,7 @@ fn series_to_mat_for_lstsq(
"Lstsq: #Data < #features. No conclusive result.".into(),
))
} else {
// Error here
// println!("{:?}", df.shape());
let mat = df.to_ndarray::<Float64Type>(IndexOrder::Fortran)?;
// println!("B");
Ok((mat, mask))
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/num/psi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ fn pl_psi_discrete_report(inputs: &[Series]) -> PolarsResult<Series> {
df2.lazy(),
[col("actual_cat")],
[col("baseline_cat")],
JoinArgs::new(JoinType::Outer { coalesce: false }),
JoinArgs::new(JoinType::Full),
)
.with_columns([
col("baseline_cnt").fill_null(0),
Expand Down
2 changes: 1 addition & 1 deletion src/stats/chi2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn pl_chi2(inputs: &[Series]) -> PolarsResult<Series> {
// Get the cartesian product
let df1 = df!(s1_name => u1)?.lazy();
let df2 = df!(s2_name => u2)?.lazy();
let cross = df1.cross_join(df2);
let cross = df1.cross_join(df2, None);

// Create a "fake" contigency table
let s1 = inputs[0].clone();
Expand Down
12 changes: 6 additions & 6 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ pub fn first_field_output(fields: &[Field]) -> PolarsResult<Field> {
Ok(fields[0].clone())
}

pub fn list_u64_output(_: &[Field]) -> PolarsResult<Field> {
Ok(Field::new(
"nodes",
DataType::List(Box::new(DataType::UInt64)),
))
}
// pub fn list_u64_output(_: &[Field]) -> PolarsResult<Field> {
// Ok(Field::new(
// "nodes",
// DataType::List(Box::new(DataType::UInt64)),
// ))
// }

pub fn list_u32_output(_: &[Field]) -> PolarsResult<Field> {
Ok(Field::new(
Expand Down

0 comments on commit ae46996

Please sign in to comment.