Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added convolution #85

Merged
merged 1 commit into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "polars_ds"
version = "0.3.2"
version = "0.3.3"
edition = "2021"

[lib]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "maturin"
[project]
name = "polars_ds"
requires-python = ">=3.8"
version = "0.3.2"
version = "0.3.3"

license = {file = "LICENSE.txt"}
classifiers = [
Expand Down
2 changes: 1 addition & 1 deletion python/polars_ds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from polars_ds.metrics import MetricExt # noqa: E402
from polars_ds.graph import GraphExt # noqa: E402

__version__ = "0.3.2"
__version__ = "0.3.3"
__all__ = ["NumExt", "StrExt", "StatsExt", "ComplexExt", "MetricExt", "GraphExt"]


Expand Down
32 changes: 31 additions & 1 deletion python/polars_ds/num.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import math
import polars as pl
from typing import Union, Optional, List
from .type_alias import DetrendMethod, Distance
from .type_alias import DetrendMethod, Distance, ConvMode
from polars.utils.udfs import _get_shared_lib_location

_lib = _get_shared_lib_location(__file__)
Expand Down Expand Up @@ -1075,6 +1075,36 @@ def psi_discrete(
returns_scalar=True,
)

def convolve(
self,
other: Union[List[float], "np.ndarray", pl.Series], # noqa: F821
mode: ConvMode = "full",
) -> pl.Expr:
"""
Performs a convolution with the filter via FFT. The current implementation's performance is worse
than SciPy but offers parallelization within Polars Context.

parameters
----------
other
The filter for the convolution. Anything that can be turned into a Polars Series will work.
mode
Please check the reference. One of `same`, `left` (left-aligned same), `right` (right-aligned same),
`valid` or `full`.

Reference
---------
https://brianmcfee.net/dstbook-site/content/ch03-convolution/Modes.html
"""

filter_ = pl.Series(values=other, dtype=pl.Float64)
return self._expr.cast(pl.Float64).register_plugin(
lib=_lib,
symbol="pl_fft_convolve",
args=[filter_, pl.lit(mode, dtype=pl.String)],
changes_length=True,
)

def _haversine(
self,
x_long: pl.Expr,
Expand Down
1 change: 1 addition & 0 deletions python/polars_ds/type_alias.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
Alternative: TypeAlias = Literal["two-sided", "less", "greater"]
ROCAUCStrategy: TypeAlias = Literal["macro", "weighted"]
Distance: TypeAlias = Literal["l1", "l2", "inf", "h", "cosine", "haversine"]
ConvMode: TypeAlias = Literal["same", "left", "right", "full", "valid"]
130 changes: 130 additions & 0 deletions src/num/convolve.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
use itertools::Itertools;
use polars::prelude::*;
use pyo3_polars::derive::polars_expr;
use realfft::RealFftPlanner;

enum ConvMode {
FULL,
SAME,
LEFT,
RIGHT,
VALID,
}

impl From<&str> for ConvMode {
fn from(value: &str) -> Self {
match value.to_lowercase().as_ref() {
"full" => Self::FULL,
"same" => Self::SAME,
"left" => Self::LEFT,
"right" => Self::RIGHT,
"valid" => Self::VALID,
_ => Self::FULL,
}
}
}

// fn next_pow_2(n:usize) -> usize {
// let mut m:usize = 2;
// while m < n {
// m <<= 1;
// }
// m
// }

// Pad to 2^n size and make this faster?
fn valid_fft_convolve(input: &[f64], filter: &[f64]) -> PolarsResult<Vec<f64>> {
let in_shape = input.len();
// let good_size = next_pow_2(in_shape);
// Prepare
let mut output_vec = vec![0.; in_shape];
output_vec[..in_shape].copy_from_slice(input);

let mut oth = vec![0.; in_shape];
oth[..filter.len()].copy_from_slice(filter);

// let n = output_vec.len() as f64;
let mut planner: RealFftPlanner<f64> = RealFftPlanner::new();
let r2c = planner.plan_fft_forward(in_shape);
let c2r = planner.plan_fft_inverse(in_shape);
let mut spec_p = r2c.make_output_vec();
let mut spec_q = r2c.make_output_vec();
// Forward FFT on the inputs
let _ = r2c.process(&mut output_vec, &mut spec_p);
// .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?;
let _ = r2c.process(&mut oth, &mut spec_q);
// .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?;

// After forward FFT, multiply in place in spec_p.
for (z1, z2) in spec_p.iter_mut().zip(spec_q.into_iter()) {
*z1 = *z1 * z2;
}
// Inverse FFT
let _ = c2r.process(&mut spec_p, &mut output_vec);
// .map_err(|e| PolarsError::ComputeError(e.to_string().into()))?;

// output_vec.truncate(in_shape);
Ok(output_vec)
}

fn fft_convolve(input: &[f64], filter: &[f64], mode: ConvMode) -> PolarsResult<Vec<f64>> {
match mode {
ConvMode::FULL => {
let t = filter.len() - 1;
let mut padded_input = vec![0.; input.len() + 2 * t];
let from_to = t..(t + input.len());
padded_input[from_to].copy_from_slice(input);
fft_convolve(&padded_input, filter, ConvMode::VALID)
}
ConvMode::SAME => {
let skip = (filter.len() - 1) / 2;
let out = fft_convolve(input, filter, ConvMode::FULL)?;
Ok(out.into_iter().skip(skip).take(input.len()).collect())
}
ConvMode::LEFT => {
let n = input.len();
let mut out = fft_convolve(input, filter, ConvMode::FULL)?;
out.truncate(n);
Ok(out)
}
ConvMode::RIGHT => {
let out = fft_convolve(input, filter, ConvMode::FULL)?;
Ok(out.into_iter().skip(filter.len() - 1).collect())
}
ConvMode::VALID => {
let out = valid_fft_convolve(input, filter)?;
let n = out.len() as f64;
Ok(out
.into_iter()
.skip(filter.len() - 1)
.map(|x| x / n)
.collect())
}
}
}

#[polars_expr(output_type=Float64)]
fn pl_fft_convolve(inputs: &[Series]) -> PolarsResult<Series> {
let s1 = inputs[0].f64()?;
let s2 = inputs[1].f64()?;
let mode = inputs[2].str()?;
let mode = mode.get(0).unwrap_or("full");
let mode: ConvMode = mode.into();

if s1.len() < s2.len() || s2.len() < 2 {
return Err(PolarsError::ComputeError(
"Convolution: The filter should have smaller length than the input column, and filter should have length >= 2.".into(),
));
}

let input = s1.rechunk();
let input = input.cont_slice().unwrap();

let other = s2.rechunk();
let other = other.cont_slice().unwrap();

let out = fft_convolve(input, other, mode)?;

let ca = Float64Chunked::from_slice(s1.name(), &out);
Ok(ca.into_series())
}
3 changes: 2 additions & 1 deletion src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use num::Float;
use polars::error::{PolarsError, PolarsResult};

mod cond_entropy;
mod target_encode;
mod convolve;
mod entrophies;
mod fft;
mod float_extras;
Expand All @@ -14,6 +14,7 @@ mod knn;
mod lempel_ziv;
mod ols;
mod psi;
mod target_encode;
mod tp_fp;
mod trapz;
mod woe_iv;
Expand Down
48 changes: 25 additions & 23 deletions src/num/target_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,38 @@ pub(crate) struct TargetEncodeKwargs {

#[inline(always)]
fn get_target_encode_frame(
discrete_col:&Series,
target:&Series,
target_mean:f64,
min_samples_leaf:f64,
smoothing:f64
discrete_col: &Series,
target: &Series,
target_mean: f64,
min_samples_leaf: f64,
smoothing: f64,
) -> PolarsResult<LazyFrame> {

let df = df!(
"values" => discrete_col.cast(&DataType::String)?,
"target" => target
)?;

Ok(
df.lazy().group_by([col("values")]).agg([
len().alias("cnt"),
col("target").mean().alias("cond_p")
]).with_column(
(lit(1f64)
/ (lit(1f64) + ((-(col("cnt").cast(DataType::Float64) - lit(min_samples_leaf))/lit(smoothing)).exp()))).alias("alpha")
).select([
Ok(df
.lazy()
.group_by([col("values")])
.agg([len().alias("cnt"), col("target").mean().alias("cond_p")])
.with_column(
(lit(1f64)
/ (lit(1f64)
+ ((-(col("cnt").cast(DataType::Float64) - lit(min_samples_leaf))
/ lit(smoothing))
.exp())))
.alias("alpha"),
)
.select([
col("values"),
(col("alpha") * col("cond_p") + (lit(1f64) - col("alpha")) * lit(target_mean)).alias("to")
])
)

(col("alpha") * col("cond_p") + (lit(1f64) - col("alpha")) * lit(target_mean))
.alias("to"),
]))
}

#[polars_expr(output_type_func=target_encode_output)]
fn pl_target_encode(inputs: &[Series], kwargs: TargetEncodeKwargs) -> PolarsResult<Series> {

// Inputs[0] and inputs[1] are the string column and the target respectively

let target_mean = inputs[2].f64()?;
Expand All @@ -60,9 +62,9 @@ fn pl_target_encode(inputs: &[Series], kwargs: TargetEncodeKwargs) -> PolarsResu
&inputs[1],
target_mean,
min_samples_leaf,
smoothing
)?.collect()?;
smoothing,
)?
.collect()?;

Ok(encoding_frame.into_struct("target_encoded").into_series())

}
}
5 changes: 1 addition & 4 deletions src/num/woe_iv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ fn iv_output(_: &[Field]) -> PolarsResult<Field> {
/// Get a lazyframe needed to compute WOE.
/// Inputs[0] by default is the discrete bins / categories
/// Inputs[1] by default is the target (0s and 1s)
fn get_woe_frame(
discrete_col:&Series,
target:&Series
) -> PolarsResult<LazyFrame> {
fn get_woe_frame(discrete_col: &Series, target: &Series) -> PolarsResult<LazyFrame> {
// let categories = &inputs[1].cast(&DataType::String)?;
let df = df!(
"values" => discrete_col.cast(&DataType::String)?,
Expand Down
Loading
Loading