Skip to content

Commit

Permalink
fix: add numpy as a possible prefix (#31)
Browse files Browse the repository at this point in the history
  • Loading branch information
premsrii authored Jan 6, 2023
1 parent 85596c2 commit 8fec7d3
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 45 deletions.
16 changes: 10 additions & 6 deletions src/sk_transformers/datetime_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ class DurationCalculatorTransformer(BaseTransformer):
}
)
transformer = DurationCalculatorTransformer(("foo", "bar"), "days", "foo_bar_duration")
transformer.fit_transform(X)["foo_bar_duration"].to_numpy()
transformer.fit_transform(X)
```
```
array([ 0, 365, -731])
foo bar foo_bar_duration
0 1960-01-01 1960-01-01 0
1 1970-01-01 1971-01-01 365
2 1990-01-01 1988-01-01 -731
```
Args:
Expand Down Expand Up @@ -82,12 +85,13 @@ class TimestampTransformer(BaseTransformer):
X = pd.DataFrame({"foo": ["1960-01-01", "1970-01-01", "1990-01-01"]})
transformer = TimestampTransformer(["foo"])
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([[-3.156192e+08],
[ 0.000000e+00],
[ 6.311520e+08]])
foo
0 -315619200.0
1 0.0
2 631152000.0
```
Args:
Expand Down
63 changes: 34 additions & 29 deletions src/sk_transformers/generic_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,20 @@ class AggregateTransformer(BaseTransformer):
)
transformer = AggregateTransformer([("foo", "bar", ["mean"])])
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([["mr", 46, 52.17...],
["mr", 32, 52.17...],
["ms", 78, 68.75],
["ms", 48, 68.75],
["ms", 93, 68.75],
["mr", 68, 52.17...],
["mr", 53, 52.17...],
["mr", 38, 52.17...],
["mr", 76, 52.17...],
["ms", 56, 68.75]], dtype=object)
foo bar MEAN(foo__bar)
0 mr 46 52.166668
1 mr 32 52.166668
2 ms 78 68.750000
3 ms 48 68.750000
4 ms 93 68.750000
5 mr 68 52.166668
6 mr 53 52.166668
7 mr 38 52.166668
8 mr 76 52.166668
9 ms 56 68.750000
```
Args:
Expand Down Expand Up @@ -148,17 +149,19 @@ class FunctionsTransformer(BaseTransformer):
Example:
```python
import numpy as np
import pandas as pd
from sk_transformers.generic_transformer import FunctionsTransformer
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = FunctionsTransformer([("foo", np.log1p, None), ("bar", np.sqrt, None)])
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([[0.69314718, 2. ],
[1.09861229, 2.23606798],
[1.38629436, 2.44948974]])
foo bar
0 0.693147 2.000000
1 1.098612 2.236068
2 1.386294 2.449490
```
Args:
Expand Down Expand Up @@ -206,12 +209,13 @@ class MapTransformer(BaseTransformer):
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = MapTransformer([("foo", lambda x: x + 1)])
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([[2, 4],
[3, 5],
[4, 6]])
foo bar
0 2 4
1 3 5
2 4 6
```
Args:
Expand Down Expand Up @@ -300,10 +304,10 @@ class NaNTransformer(BaseTransformer):
transformer.fit_transform(X)
```
```
foo bar
0 1.0 a
1 -999.0 -999
2 3.0 c
foo bar
0 1.0 a
1 -999.0 -999
2 3.0 c
```
Args:
Expand Down Expand Up @@ -470,14 +474,15 @@ class ValueReplacerTransformer(BaseTransformer):
]
)
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([['1900-01-01'],
['2022/01/08'],
['1900-01-01'],
['1982-12-7'],
['1900-01-01']], dtype=object)
foo
0 1900-01-01
1 2022/01/08
2 1900-01-01
3 1982-12-7
4 1900-01-01
```
Expand Down
25 changes: 21 additions & 4 deletions src/sk_transformers/number_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ class MathExpressionTransformer(BaseTransformer):
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = MathExpressionTransformer([("foo", "np.sum", "bar", {"axis": 0})])
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([[1, 4, 5],
[2, 5, 7],
[3, 6, 9]])
foo bar foo_sum_bar
0 1 4 5
1 2 5 7
2 3 6 9
```
Args:
Expand Down Expand Up @@ -71,6 +72,21 @@ def __verify_operation(self, operation: str) -> Tuple[bool, Any]:

return is_np_op, op

def __abbreviate_numpy_in_operation(self, operation: str) -> str:
"""
Replaces `numpy` at the start of a string with `np`.
Args:
operation (str): The operation as a string.
Returns:
str: The operation as a string with numpy replaced with np.
"""

if operation.startswith("numpy"):
operation = "np" + operation[5:]
return operation

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Applies the operation to the column and the value.
Expand All @@ -86,6 +102,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
X = check_ready_to_transform(self, X, [feature[0] for feature in self.features])

for (feature, operation, value, kwargs) in self.features:
operation = self.__abbreviate_numpy_in_operation(operation)
is_np_op, op = self.__verify_operation(operation)

new_column = f"{feature}_{operation}".replace("np.", "")
Expand Down
14 changes: 9 additions & 5 deletions src/sk_transformers/string_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ class IPAddressEncoderTransformer(BaseTransformer):
X = pd.DataFrame({"foo": ["192.168.1.1", "2001:0db8:3c4d:0015:0000:0000:1a2f:1a2b"]})
transformer = IPAddressEncoderTransformer(["foo"])
transformer.fit_transform(X).to_numpy()
transformer.fit_transform(X)
```
```
array([[3.23223578e-01],
[4.25407664e-11]])
foo
0 3.232236e-01
1 4.254077e-11
```
Args:
Expand Down Expand Up @@ -196,10 +197,13 @@ class StringSimilarityTransformer(BaseTransformer):
}
)
transformer = StringSimilarityTransformer(("foo", "bar"))
transformer.fit_transform(X)["foo_bar_similarity"].to_numpy()
transformer.fit_transform(X)
```
```
array([0.75, 1. , 0.25])
foo bar foo_bar_similarity
0 abcdefgh ghabcdef 0.75
1 ijklmnop ijklmnop 1.00
2 qrstuvwx qr000000 0.25
```
Args:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_transformer/test_number_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_math_expression_transformer_in_pipeline(X_numbers) -> None:
("small_numbers", "add", 1, None),
("small_numbers", "mul", "small_numbers", None),
("small_numbers", "np.sum", "small_float_numbers", {"axis": 0}),
("small_numbers", "np.sin", None, None),
("small_numbers", "numpy.sin", None, None),
("small_numbers", "np.sum", 1, None),
("big_numbers", "neg", None, None),
]
Expand Down

0 comments on commit 8fec7d3

Please sign in to comment.