In [1]:
import sys

import numpy as np
import pandas as pd

from sklearn.datasets import make_regression


In [34]:
NUM_FEATURES = 1
NUM_BINS = 5
NUM_SAMPLES = 10
# Generate synthetic regression data
X, y = make_regression(n_samples=NUM_SAMPLES, n_features=NUM_FEATURES, noise=0.1, random_state=1)

# Convert to pandas DataFrame
df = pd.DataFrame(data=X, columns=[f'Feature_{i}' for i in range(1, NUM_FEATURES+1)])
df['Target'] = y

df_data = df.drop('Target', axis=1).astype('float32')

print(df_data.head())

   Feature_1
0  -2.301539
1  -0.761207
2  -0.528172
3   1.744812
4   0.319039


In [35]:
df_data.dtypes

Feature_1    float32
dtype: object

In [36]:
%%time
from ple_transformer import PiecewiseLinearEncoderNumpy

CPU times: user 6 µs, sys: 3 µs, total: 9 µs
Wall time: 11.2 µs


In [37]:
%%time
# Create an instance of the transformer
transformer = PiecewiseLinearEncoderNumpy(num_bins=NUM_BINS)

# Fit the transformer to the data
transformer.fit(df_data)


CPU times: user 3.12 ms, sys: 0 ns, total: 3.12 ms
Wall time: 2.64 ms


In [38]:

%%time
# Transform the data using the fitted transformer
encoded_data = transformer.transform(df_data)
encoded_data


CPU times: user 2.41 ms, sys: 644 µs, total: 3.05 ms
Wall time: 3.42 ms


array([[[0.        , 0.        , 0.        , 0.        , 0.        ]],

       [[1.        , 0.23802814, 0.        , 0.        , 0.        ]],

       [[1.        , 1.        , 0.06196053, 0.        , 0.        ]],

       [[1.        , 1.        , 1.        , 1.        , 1.        ]],

       [[1.        , 1.        , 1.        , 0.32818043, 0.        ]],

       [[1.        , 0.808551  , 0.        , 0.        , 0.        ]],

       [[1.        , 1.        , 1.        , 1.        , 0.8344371 ]],

       [[1.        , 1.        , 1.        , 0.85393834, 0.        ]],

       [[0.8312498 , 0.        , 0.        , 0.        , 0.        ]],

       [[1.        , 1.        , 0.5786431 , 0.        , 0.        ]]],
      dtype=float32)