Skip to content

Commit

Permalink
refactor: add init file for easier usage of transformers (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrislemke committed Jan 18, 2023
1 parent a1279b4 commit e1edb18
Show file tree
Hide file tree
Showing 15 changed files with 77 additions and 56 deletions.
6 changes: 3 additions & 3 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Let's assume you want to use some method from [NumPy's mathematical functions, t
use the [`MathExpressionTransformer`](https://chrislemke.github.io/sk-transformers/number_transformer-reference/#sk-transformers.transformer.number_transformer.MathExpressionTransformer).
```python
import pandas as pd
from sk_transformers.number_transformer import MathExpressionTransformer
from sk_transformers import MathExpressionTransformer

X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = MathExpressionTransformer([("foo", "np.sum", "bar", {"axis": 0})])
Expand All @@ -91,8 +91,8 @@ In the next example, we additionally add the [`MapTransformer`](https://chrislem
Together with [scikit-learn's pipelines](https://scikit-learn.org/stable/modules/compose.html#combining-estimators) it would look like this:
```python
import pandas as pd
from sk_transformers.number_transformer import MathExpressionTransformer
from sk_transformers.generic_transformer import MapTransformer
from sk_transformers import MathExpressionTransformer
from sk_transformers import MapTransformer
from sklearn.pipeline import Pipeline

X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
Expand Down
42 changes: 21 additions & 21 deletions examples/playground.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.datetime_transformer import DurationCalculatorTransformer\n",
"from sk_transformers import DurationCalculatorTransformer\n",
"\n",
"X = pd.DataFrame(\n",
" {\n",
Expand Down Expand Up @@ -85,7 +85,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.datetime_transformer import TimestampTransformer\n",
"from sk_transformers import TimestampTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"1960-01-01\", \"1970-01-01\", \"1990-01-01\"]})\n",
"transformer = TimestampTransformer([\"foo\"])\n",
Expand Down Expand Up @@ -123,7 +123,7 @@
"import numpy as np\n",
"import pandas as pd\n",
"from pytorch_widedeep.datasets import load_adult\n",
"from sk_transformers.deep_transformer import ToVecTransformer\n",
"from sk_transformers import ToVecTransformer\n",
"\n",
"df = load_adult(as_frame=True)\n",
"df[\"target\"] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\n",
Expand Down Expand Up @@ -168,7 +168,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.encoder_transformer import MeanEncoderTransformer\n",
"from sk_transformers import MeanEncoderTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"a\", \"b\", \"a\", \"c\", \"b\", \"a\", \"c\", \"a\", \"b\", \"c\"]})\n",
"y = pd.Series([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])\n",
Expand Down Expand Up @@ -204,7 +204,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import AggregateTransformer\n",
"from sk_transformers import AggregateTransformer\n",
"\n",
"X = pd.DataFrame(\n",
" {\n",
Expand Down Expand Up @@ -234,7 +234,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import ColumnDropperTransformer\n",
"from sk_transformers import ColumnDropperTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, 2, 3], \"bar\": [4, 5, 6]})\n",
"transformer = ColumnDropperTransformer([\"foo\"])\n",
Expand All @@ -259,7 +259,7 @@
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sk_transformers.generic_transformer import DtypeTransformer\n",
"from sk_transformers import DtypeTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, 2, 3], \"bar\": [\"a\", \"a\", \"b\"]})\n",
"transformer = DtypeTransformer([(\"foo\", np.float32), (\"bar\", \"category\")])\n",
Expand All @@ -285,7 +285,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import FunctionsTransformer\n",
"from sk_transformers import FunctionsTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, 2, 3], \"bar\": [4, 5, 6]})\n",
"transformer = FunctionsTransformer([(\"foo\", np.log1p, None), (\"bar\", np.sqrt, None)])\n",
Expand All @@ -309,7 +309,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import MapTransformer\n",
"from sk_transformers import MapTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, 2, 3], \"bar\": [4, 5, 6]})\n",
"transformer = MapTransformer([(\"foo\", lambda x: x + 1)])\n",
Expand All @@ -335,7 +335,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import LeftJoinTransformer\n",
"from sk_transformers import LeftJoinTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"A\", \"B\", \"C\", \"A\", \"C\"]})\n",
"lookup_df = pd.Series([1, 2, 3], index=[\"A\", \"B\", \"C\"], name=\"values\")\n",
Expand All @@ -359,7 +359,7 @@
"metadata": {},
"outputs": [],
"source": [
"from sk_transformers.generic_transformer import NaNTransformer\n",
"from sk_transformers import NaNTransformer\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
Expand Down Expand Up @@ -389,7 +389,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import QueryTransformer\n",
"from sk_transformers import QueryTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, 8, 3, 6, 5, 4, 7, 2]})\n",
"transformer = QueryTransformer([\"foo > 4\"])\n",
Expand Down Expand Up @@ -418,7 +418,7 @@
"metadata": {},
"outputs": [],
"source": [
"from sk_transformers.generic_transformer import ValueIndicatorTransformer\n",
"from sk_transformers import ValueIndicatorTransformer\n",
"import pandas as pd\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, -999, 3], \"bar\": [\"a\", \"-999\", \"c\"]})\n",
Expand Down Expand Up @@ -446,7 +446,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.generic_transformer import ValueReplacerTransformer\n",
"from sk_transformers import ValueReplacerTransformer\n",
"\n",
"X = pd.DataFrame(\n",
" {\"foo\": [\"0000-01-01\", \"2022/01/08\", \"bar\", \"1982-12-7\", \"28-09-2022\"]}\n",
Expand Down Expand Up @@ -492,7 +492,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.number_transformer import MathExpressionTransformer\n",
"from sk_transformers import MathExpressionTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [1, 2, 3], \"bar\": [4, 5, 6]})\n",
"transformer = MathExpressionTransformer([(\"foo\", \"np.sum\", \"bar\", {\"axis\": 0})])\n",
Expand Down Expand Up @@ -524,7 +524,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.string_transformer import EmailTransformer\n",
"from sk_transformers import EmailTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"person-123@test.com\"]})\n",
"transformer = EmailTransformer([\"foo\"])\n",
Expand All @@ -550,7 +550,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.string_transformer import IPAddressEncoderTransformer\n",
"from sk_transformers import IPAddressEncoderTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"192.168.1.1\", \"2001:0db8:3c4d:0015:0000:0000:1a2f:1a2b\"]})\n",
"transformer = IPAddressEncoderTransformer([\"foo\"])\n",
Expand All @@ -574,7 +574,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.string_transformer import PhoneTransformer\n",
"from sk_transformers import PhoneTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"+49123456789\", \"0044987654321\", \"3167891234\"]})\n",
"transformer = PhoneTransformer([\"foo\"])\n",
Expand All @@ -598,7 +598,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.string_transformer import StringSimilarityTransformer\n",
"from sk_transformers import StringSimilarityTransformer\n",
"\n",
"X = pd.DataFrame(\n",
" {\n",
Expand Down Expand Up @@ -630,7 +630,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"from sk_transformers.string_transformer import StringSlicerTransformer\n",
"from sk_transformers import StringSlicerTransformer\n",
"\n",
"X = pd.DataFrame({\"foo\": [\"abc\", \"def\", \"ghi\"], \"bar\": [\"jkl\", \"mno\", \"pqr\"]})\n",
"transformer = StringSlicerTransformer([(\"foo\", (0, 3, 2)), (\"bar\", (2,))])\n",
Expand Down Expand Up @@ -662,7 +662,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
"version": "3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:25:29) [Clang 14.0.6 ]"
},
"vscode": {
"interpreter": {
Expand Down
26 changes: 26 additions & 0 deletions src/sk_transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from sk_transformers.datetime_transformer import (
DurationCalculatorTransformer,
TimestampTransformer,
)
from sk_transformers.deep_transformer import ToVecTransformer
from sk_transformers.encoder_transformer import MeanEncoderTransformer
from sk_transformers.generic_transformer import (
AggregateTransformer,
ColumnDropperTransformer,
DtypeTransformer,
FunctionsTransformer,
LeftJoinTransformer,
MapTransformer,
NaNTransformer,
QueryTransformer,
ValueIndicatorTransformer,
ValueReplacerTransformer,
)
from sk_transformers.number_transformer import MathExpressionTransformer
from sk_transformers.string_transformer import (
EmailTransformer,
IPAddressEncoderTransformer,
PhoneTransformer,
StringSimilarityTransformer,
StringSlicerTransformer,
)
4 changes: 2 additions & 2 deletions src/sk_transformers/datetime_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class DurationCalculatorTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.datetime_transformer import DurationCalculatorTransformer
from sk_transformers import DurationCalculatorTransformer
X = pd.DataFrame(
{
Expand Down Expand Up @@ -79,7 +79,7 @@ class TimestampTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.datetime_transformer import TimestampTransformer
from sk_transformers import TimestampTransformer
X = pd.DataFrame({"foo": ["1960-01-01", "1970-01-01", "1990-01-01"]})
transformer = TimestampTransformer(["foo"])
Expand Down
2 changes: 1 addition & 1 deletion src/sk_transformers/deep_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ToVecTransformer(BaseEstimator, TransformerMixin):
import numpy as np
import pandas as pd
from pytorch_widedeep.datasets import load_adult
from sk_transformers.deep_transformer import ToVecTransformer
from sk_transformers import ToVecTransformer
df = load_adult(as_frame=True)
df["target"] = (df["income"].apply(lambda x: ">50K" in x)).astype(int)
Expand Down
2 changes: 1 addition & 1 deletion src/sk_transformers/encoder_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class MeanEncoderTransformer(BaseEstimator, TransformerMixin):
Example:
```python
import pandas as pd
from sk_transformers.encoder_transformer import MeanEncoderTransformer
from sk_transformers import MeanEncoderTransformer
X = pd.DataFrame({"foo": ["a", "b", "a", "c", "b", "a", "c", "a", "b", "c"]})
y = pd.Series([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])
Expand Down
24 changes: 11 additions & 13 deletions src/sk_transformers/generic_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class DtypeTransformer(BaseTransformer):
```python
import numpy as np
import pandas as pd
from sk_transformers.generic_transformer import DtypeTransformer
from sk_transformers import DtypeTransformer
X = pd.DataFrame({"foo": [1, 2, 3], "bar": ["a", "a", "b"]})
transformer = DtypeTransformer([("foo", np.float32), ("bar", "category")])
Expand Down Expand Up @@ -69,7 +69,7 @@ class AggregateTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.generic_transformer import AggregateTransformer
from sk_transformers import AggregateTransformer
X = pd.DataFrame(
{
Expand Down Expand Up @@ -161,7 +161,7 @@ class FunctionsTransformer(BaseTransformer):
```python
import numpy as np
import pandas as pd
from sk_transformers.generic_transformer import FunctionsTransformer
from sk_transformers import FunctionsTransformer
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = FunctionsTransformer([("foo", np.log1p, None), ("bar", np.sqrt, None)])
Expand Down Expand Up @@ -216,7 +216,7 @@ class MapTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.generic_transformer import MapTransformer
from sk_transformers import MapTransformer
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = MapTransformer([("foo", lambda x: x + 1)])
Expand Down Expand Up @@ -263,7 +263,7 @@ class ColumnDropperTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.generic_transformer import ColumnDropperTransformer
from sk_transformers import ColumnDropperTransformer
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = ColumnDropperTransformer(["foo"])
Expand Down Expand Up @@ -304,7 +304,7 @@ class NaNTransformer(BaseTransformer):
Example:
```python
from sk_transformers.generic_transformer import NaNTransformer
from sk_transformers import NaNTransformer
import pandas as pd
import numpy as np
Expand Down Expand Up @@ -357,7 +357,7 @@ class ValueIndicatorTransformer(BaseTransformer):
Example:
```python
from sk_transformers.generic_transformer import ValueIndicatorTransformer
from sk_transformers import ValueIndicatorTransformer
import pandas as pd
X = pd.DataFrame({"foo": [1, -999, 3], "bar": ["a", "-999", "c"]})
Expand Down Expand Up @@ -410,14 +410,12 @@ class QueryTransformer(BaseTransformer):
"""Applies a list of queries to a dataframe. If it operates on a dataset
used for supervised learning this transformer should be applied on the
dataframe containing `X` and `y`. So removing of columns by queries also
removes the corresponding `y` value. Read more about queries [here](https:/
/pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html).
removes the corresponding `y` value.
Example:
```python
import pandas as pd
from sk_transformers.generic_transformer import QueryTransformer
from sk_transformers import QueryTransformer
X = pd.DataFrame({"foo": [1, 8, 3, 6, 5, 4, 7, 2]})
transformer = QueryTransformer(["foo > 4"])
Expand Down Expand Up @@ -468,7 +466,7 @@ class ValueReplacerTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.generic_transformer import ValueReplacerTransformer
from sk_transformers import ValueReplacerTransformer
X = pd.DataFrame(
{"foo": ["0000-01-01", "2022/01/08", "bar", "1982-12-7", "28-09-2022"]}
Expand Down Expand Up @@ -554,7 +552,7 @@ class LeftJoinTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.generic_transformer import LeftJoinTransformer
from sk_transformers import LeftJoinTransformer
X = pd.DataFrame({"foo": ["A", "B", "C", "A", "C"]})
lookup_df = pd.Series([1, 2, 3], index=["A", "B", "C"], name="values")
Expand Down
2 changes: 1 addition & 1 deletion src/sk_transformers/number_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class MathExpressionTransformer(BaseTransformer):
Example:
```python
import pandas as pd
from sk_transformers.number_transformer import MathExpressionTransformer
from sk_transformers import MathExpressionTransformer
X = pd.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]})
transformer = MathExpressionTransformer([("foo", "np.sum", "bar", {"axis": 0})])
Expand Down
Loading

0 comments on commit e1edb18

Please sign in to comment.