Skip to content

Commit

Permalink
Merge pull request #85 from abstractqqq/convolution
Browse files Browse the repository at this point in the history
  • Loading branch information
abstractqqq committed Feb 26, 2024
2 parents 48c59bb + 8852548 commit 33c03f6
Show file tree
Hide file tree
Showing 12 changed files with 339 additions and 41 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars_ds"
version = "0.3.2"
version = "0.3.3"
edition = "2021"

[lib]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "polars_ds"
requires-python = ">=3.8"
version = "0.3.2"
version = "0.3.3"

license = {file = "LICENSE.txt"}
classifiers = [
Expand Down
2 changes: 1 addition & 1 deletion python/polars_ds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from polars_ds.metrics import MetricExt # noqa: E402
from polars_ds.graph import GraphExt # noqa: E402

__version__ = "0.3.2"
__version__ = "0.3.3"
__all__ = ["NumExt", "StrExt", "StatsExt", "ComplexExt", "MetricExt", "GraphExt"]


Expand Down
32 changes: 31 additions & 1 deletion python/polars_ds/num.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import math
import polars as pl
from typing import Union, Optional, List
from .type_alias import DetrendMethod, Distance
from .type_alias import DetrendMethod, Distance, ConvMode
from polars.utils.udfs import _get_shared_lib_location

_lib = _get_shared_lib_location(__file__)
Expand Down Expand Up @@ -1075,6 +1075,36 @@ def psi_discrete(
returns_scalar=True,
)

def convolve(
self,
other: Union[List[float], "np.ndarray", pl.Series], # noqa: F821
mode: ConvMode = "full",
) -> pl.Expr:
"""
Performs a convolution with the filter via FFT. The current implementation's performance is worse
than SciPy but offers parallelization within Polars Context.
parameters
----------
other
The filter for the convolution. Anything that can be turned into a Polars Series will work.
mode
Please check the reference. One of `same`, `left` (left-aligned same), `right` (right-aligned same),
`valid` or `full`.
Reference
---------
https://brianmcfee.net/dstbook-site/content/ch03-convolution/Modes.html
"""

filter_ = pl.Series(values=other, dtype=pl.Float64)
return self._expr.cast(pl.Float64).register_plugin(
lib=_lib,
symbol="pl_fft_convolve",
args=[filter_, pl.lit(mode, dtype=pl.String)],
changes_length=True,
)

def _haversine(
self,
x_long: pl.Expr,
Expand Down
1 change: 1 addition & 0 deletions python/polars_ds/type_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
Alternative: TypeAlias = Literal["two-sided", "less", "greater"]
ROCAUCStrategy: TypeAlias = Literal["macro", "weighted"]
Distance: TypeAlias = Literal["l1", "l2", "inf", "h", "cosine", "haversine"]
ConvMode: TypeAlias = Literal["same", "left", "right", "full", "valid"]
130 changes: 130 additions & 0 deletions src/num/convolve.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use itertools::Itertools;
use polars::prelude::*;
use pyo3_polars::derive::polars_expr;
use realfft::RealFftPlanner;

enum ConvMode {
FULL,
SAME,
LEFT,
RIGHT,
VALID,
}

impl From<&str> for ConvMode {
fn from(value: &str) -> Self {
match value.to_lowercase().as_ref() {
"full" => Self::FULL,
"same" => Self::SAME,
"left" => Self::LEFT,
"right" => Self::RIGHT,
"valid" => Self::VALID,
_ => Self::FULL,
}
}
}

// fn next_pow_2(n:usize) -> usize {
// let mut m:usize = 2;
// while m < n {
// m <<= 1;
// }
// m
// }

// Pad to 2^n size and make this faster?
fn valid_fft_convolve(input: &[f64], filter: &[f64]) -> PolarsResult<Vec<f64>> {
let in_shape = input.len();
// let good_size = next_pow_2(in_shape);
// Prepare
let mut output_vec = vec![0.; in_shape];
output_vec[..in_shape].copy_from_slice(input);

let mut oth = vec![0.; in_shape];
oth[..filter.len()].copy_from_slice(filter);

// let n = output_vec.len() as f64;
let mut planner: RealFftPlanner<f64> = RealFftPlanner::new();
let r2c = planner.plan_fft_forward(in_shape);
let c2r = planner.plan_fft_inverse(in_shape);
let mut spec_p = r2c.make_output_vec();
let mut spec_q = r2c.make_output_vec();
// Forward FFT on the inputs
let _ = r2c.process(&mut output_vec, &mut spec_p);
// .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?;
let _ = r2c.process(&mut oth, &mut spec_q);
// .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?;

// After forward FFT, multiply in place in spec_p.
for (z1, z2) in spec_p.iter_mut().zip(spec_q.into_iter()) {
*z1 = *z1 * z2;
}
// Inverse FFT
let _ = c2r.process(&mut spec_p, &mut output_vec);
// .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?;

// output_vec.truncate(in_shape);
Ok(output_vec)
}

fn fft_convolve(input: &[f64], filter: &[f64], mode: ConvMode) -> PolarsResult<Vec<f64>> {
match mode {
ConvMode::FULL => {
let t = filter.len() - 1;
let mut padded_input = vec![0.; input.len() + 2 * t];
let from_to = t..(t + input.len());
padded_input[from_to].copy_from_slice(input);
fft_convolve(&padded_input, filter, ConvMode::VALID)
}
ConvMode::SAME => {
let skip = (filter.len() - 1) / 2;
let out = fft_convolve(input, filter, ConvMode::FULL)?;
Ok(out.into_iter().skip(skip).take(input.len()).collect())
}
ConvMode::LEFT => {
let n = input.len();
let mut out = fft_convolve(input, filter, ConvMode::FULL)?;
out.truncate(n);
Ok(out)
}
ConvMode::RIGHT => {
let out = fft_convolve(input, filter, ConvMode::FULL)?;
Ok(out.into_iter().skip(filter.len() - 1).collect())
}
ConvMode::VALID => {
let out = valid_fft_convolve(input, filter)?;
let n = out.len() as f64;
Ok(out
.into_iter()
.skip(filter.len() - 1)
.map(|x| x / n)
.collect())
}
}
}

#[polars_expr(output_type=Float64)]
fn pl_fft_convolve(inputs: &[Series]) -> PolarsResult<Series> {
let s1 = inputs[0].f64()?;
let s2 = inputs[1].f64()?;
let mode = inputs[2].str()?;
let mode = mode.get(0).unwrap_or("full");
let mode: ConvMode = mode.into();

if s1.len() < s2.len() || s2.len() < 2 {
return Err(PolarsError::ComputeError(
"Convolution: The filter should have smaller length than the input column, and filter should have length >= 2.".into(),
));
}

let input = s1.rechunk();
let input = input.cont_slice().unwrap();

let other = s2.rechunk();
let other = other.cont_slice().unwrap();

let out = fft_convolve(input, other, mode)?;

let ca = Float64Chunked::from_slice(s1.name(), &out);
Ok(ca.into_series())
}
3 changes: 2 additions & 1 deletion src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use num::Float;
use polars::error::{PolarsError, PolarsResult};

mod cond_entropy;
mod target_encode;
mod convolve;
mod entrophies;
mod fft;
mod float_extras;
Expand All @@ -14,6 +14,7 @@ mod knn;
mod lempel_ziv;
mod ols;
mod psi;
mod target_encode;
mod tp_fp;
mod trapz;
mod woe_iv;
Expand Down
48 changes: 25 additions & 23 deletions src/num/target_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,38 @@ pub(crate) struct TargetEncodeKwargs {

#[inline(always)]
fn get_target_encode_frame(
discrete_col:&Series,
target:&Series,
target_mean:f64,
min_samples_leaf:f64,
smoothing:f64
discrete_col: &Series,
target: &Series,
target_mean: f64,
min_samples_leaf: f64,
smoothing: f64,
) -> PolarsResult<LazyFrame> {

let df = df!(
"values" => discrete_col.cast(&DataType::String)?,
"target" => target
)?;

Ok(
df.lazy().group_by([col("values")]).agg([
len().alias("cnt"),
col("target").mean().alias("cond_p")
]).with_column(
(lit(1f64)
/ (lit(1f64) + ((-(col("cnt").cast(DataType::Float64) - lit(min_samples_leaf))/lit(smoothing)).exp()))).alias("alpha")
).select([
Ok(df
.lazy()
.group_by([col("values")])
.agg([len().alias("cnt"), col("target").mean().alias("cond_p")])
.with_column(
(lit(1f64)
/ (lit(1f64)
+ ((-(col("cnt").cast(DataType::Float64) - lit(min_samples_leaf))
/ lit(smoothing))
.exp())))
.alias("alpha"),
)
.select([
col("values"),
(col("alpha") * col("cond_p") + (lit(1f64) - col("alpha")) * lit(target_mean)).alias("to")
])
)

(col("alpha") * col("cond_p") + (lit(1f64) - col("alpha")) * lit(target_mean))
.alias("to"),
]))
}

#[polars_expr(output_type_func=target_encode_output)]
fn pl_target_encode(inputs: &[Series], kwargs: TargetEncodeKwargs) -> PolarsResult<Series> {

// Inputs[0] and inputs[1] are the string column and the target respectively

let target_mean = inputs[2].f64()?;
Expand All @@ -60,9 +62,9 @@ fn pl_target_encode(inputs: &[Series], kwargs: TargetEncodeKwargs) -> PolarsResu
&inputs[1],
target_mean,
min_samples_leaf,
smoothing
)?.collect()?;
smoothing,
)?
.collect()?;

Ok(encoding_frame.into_struct("target_encoded").into_series())

}
}
5 changes: 1 addition & 4 deletions src/num/woe_iv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ fn iv_output(_: &[Field]) -> PolarsResult<Field> {
/// Get a lazyframe needed to compute WOE.
/// Inputs[0] by default is the discrete bins / categories
/// Inputs[1] by default is the target (0s and 1s)
fn get_woe_frame(
discrete_col:&Series,
target:&Series
) -> PolarsResult<LazyFrame> {
fn get_woe_frame(discrete_col: &Series, target: &Series) -> PolarsResult<LazyFrame> {
// let categories = &inputs[1].cast(&DataType::String)?;
let df = df!(
"values" => discrete_col.cast(&DataType::String)?,
Expand Down
Loading

0 comments on commit 33c03f6

Please sign in to comment.