Skip to content

Commit

Permalink
maintain: add more pre-commit hooks (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
chrislemke authored and premsrii committed Jan 16, 2023
1 parent 02e7a60 commit b716c44
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 123 deletions.
47 changes: 43 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,66 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-xml
- id: mixed-line-ending
args: ["--fix=lf"]
- id: check-added-large-files
- id: check-ast
- id: check-builtin-literals
- id: check-case-conflict
- id: check-docstring-first
- id: check-merge-conflict
- id: check-toml
- id: debug-statements
- id: fix-byte-order-marker
- id: forbid-new-submodules
- id: forbid-submodules
- id: detect-private-key
- id: no-commit-to-branch
args: ["--branch=main", "--branch=develop"]

- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0
hooks:
- id: pyupgrade
- id: python-use-type-annotations
- id: python-check-blanket-noqa
- id: python-check-mock-methods
- id: python-no-eval
- id: python-no-log-warn
- id: python-use-type-annotations
- id: text-unicode-replacement-char

- repo: https://github.com/PyCQA/docformatter
rev: v1.5.1
hooks:
- id: docformatter

- repo: https://github.com/nbQA-dev/nbQA
rev: 1.6.1
hooks:
- id: nbqa-check-ast
- id: nbqa-mypy
args: ["--config-file=pyproject.toml"]
- id: nbqa-pyupgrade

- repo: https://github.com/bwhmather/ssort
rev: v0.11.6
hooks:
- id: ssort

- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
hooks:
- id: pyupgrade

- repo: https://github.com/psf/black
rev: 22.12.0
hooks:
- id: black
args: ["--config=pyproject.toml"]
- id: black-jupyter
args: ["--config=pyproject.toml"]
files: \.ipynb$

- repo: https://github.com/PyCQA/isort
rev: 5.11.4
Expand All @@ -65,7 +99,7 @@ repos:
args: ["--profile=black"]

- repo: https://github.com/PyCQA/pylint
rev: v2.15.9
rev: v2.15.10
hooks:
- id: pylint
args: ["--rcfile=pyproject.toml"]
Expand All @@ -86,6 +120,11 @@ repos:
args:
- "-r"

- repo: https://github.com/kynan/nbstripout
rev: 0.6.1
hooks:
- id: nbstripout

- repo: https://github.com/python-poetry/poetry
rev: 1.3.0
hooks:
Expand Down
13 changes: 8 additions & 5 deletions src/sk_transformers/base_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@


class BaseTransformer(BaseEstimator, TransformerMixin):
"""
Base class for all custom transformers. This class inherits from BaseEstimator and TransformerMixin.
Its main purpose is to provide an implementation of the `fit` method that does nothing except setting the `self.fitted_` to `True`.
Since most custom transformers do not need to implement a fit method, this class
can be used as a base class for all transformers not needing a `fit` method.
"""Base class for all custom transformers.
This class inherits from BaseEstimator and TransformerMixin. Its
main purpose is to provide an implementation of the `fit` method
that does nothing except setting the `self.fitted_` to `True`. Since
most custom transformers do not need to implement a fit method, this
class can be used as a base class for all transformers not needing a
`fit` method.
"""

def __init__(self) -> None:
Expand Down
13 changes: 5 additions & 8 deletions src/sk_transformers/datetime_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@


class DurationCalculatorTransformer(BaseTransformer):
"""
Calculates the duration between to given dates.
"""Calculates the duration between to given dates.
Example:
```python
Expand Down Expand Up @@ -50,8 +49,7 @@ def __init__(
self.new_column_name = new_column_name

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Transform method that calculates the duration between two dates.
"""Transform method that calculates the duration between two dates.
Args:
X (pandas.DataFrame): The input DataFrame.
Expand All @@ -75,8 +73,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:


class TimestampTransformer(BaseTransformer):
"""
Transforms a date column with a specified format into a timestamp column.
"""Transforms a date column with a specified format into a timestamp
column.
Example:
```python
Expand Down Expand Up @@ -109,8 +107,7 @@ def __init__(
self.date_format = date_format

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Transforms columns from the provided dataframe.
"""Transforms columns from the provided dataframe.
Args:
X (pandas.DataFrame): Dataframe with columns to transform.
Expand Down
17 changes: 10 additions & 7 deletions src/sk_transformers/deep_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@


class ToVecTransformer(BaseEstimator, TransformerMixin):
"""
This transformer trains an [FT-Transformer](https://paperswithcode.com/method/ft-transformer)
using the [pytorch-widedeep package](https://github.com/jrzaurin/pytorch-widedeep) and extracts the embeddings
"""This transformer trains an [FT-
Transformer](https://paperswithcode.com/method/ft-transformer) using the.
[pytorch-widedeep package](https://github.com/jrzaurin/pytorch-widedeep)
and extracts the embeddings.
from its embedding layer. The output shape of the transformer is (number of rows,(`input_dim` * number of columns)).
Please refer to [this example](https://pytorch-widedeep.readthedocs.io/en/latest/examples/09_extracting_embeddings.html)
for pytorch_widedeep example on how to extract embeddings.
Expand Down Expand Up @@ -99,8 +102,8 @@ def __init__(
self.tab_vec_: Optional[Tab2Vec] = None

def fit(self, X: pd.DataFrame, y: NDArray) -> "ToVecTransformer":
"""
Fits the `ToVecTransformer`. The `TabPreprocessor` is fitted and the `FTTransformer` is trained.
"""Fits the `ToVecTransformer`. The `TabPreprocessor` is fitted and the
`FTTransformer` is trained.
Args:
X (pd.DataFrame): The input data.
Expand Down Expand Up @@ -151,8 +154,8 @@ def fit(self, X: pd.DataFrame, y: NDArray) -> "ToVecTransformer":
return self

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Transforms the input data and returns the embeddings.
"""Transforms the input data and returns the embeddings.
The output shape is (number of rows,(`input_dim` * number of columns)).
Args:
Expand Down
10 changes: 4 additions & 6 deletions src/sk_transformers/encoder_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@


class MeanEncoderTransformer(BaseEstimator, TransformerMixin):
"""
Scikit-learn API for the [feature-engine MeanEncoder](https://feature-engine.readthedocs.io/en/latest/api_doc/encoding/MeanEncoder.html).
"""Scikit-learn API for the [feature-engine MeanEncoder](https://feature-
engine.readthedocs.io/en/latest/api_doc/encoding/MeanEncoder.html).
Example:
```python
Expand Down Expand Up @@ -44,8 +44,7 @@ def __init__(self, fill_na_value: Union[int, float] = -999) -> None:
self.fill_na_value = fill_na_value

def fit(self, X: pd.DataFrame, y: pd.Series) -> "MeanEncoderTransformer":
"""
Fit the MeanEncoder to the data.
"""Fit the MeanEncoder to the data.
Args:
X (pandas.DataFrame): DataFrame to fit the MeanEncoder to.
Expand All @@ -58,8 +57,7 @@ def fit(self, X: pd.DataFrame, y: pd.Series) -> "MeanEncoderTransformer":
return self

def transform(self, X: pd.DataFrame) -> pd.DataFrame:
"""
Transform the data using the fitted MeanEncoder.
"""Transform the data using the fitted MeanEncoder.
Args:
X (pandas.DataFrame): DataFrame to transform.
Expand Down
Loading

0 comments on commit b716c44

Please sign in to comment.