# Logodds

In [None]:
def logodds(x):
    if not len(x):
        return 0
    event_dist = sum(x["smoking"]) / len(x)
    non_event_dist = sum(x["smoking"] == 0) / len(x)

    return np.log((event_dist + 0.001) / (non_event_dist + 0.001))

## WoE Transformer

In [None]:
class WOETransformer(BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin):
    def __init__(self, eps: float = 0.001):
        self.woe_mapping_ = {}
        self.eps = eps

    def __sklearn_clone__(self):
        return self


    def fit(self, X, y=None):
        def logodds(x):
            if not len(x):
                return 0
            event_dist = sum(x) / len(x)
            non_event_dist = sum(x == 0) / len(x)

            return np.log((event_dist + self.eps) / (non_event_dist + self.eps))
        check_X_y(X, y, accept_sparse=False)

        Xy = pd.concat([X, y.astype(int).to_frame("target")], axis=1)
        self.feature_names_in_ = X.columns.tolist()
        self.n_features_ = X.shape[1]

        event_dist = Xy["target"].value_counts()

        del X, y

        for c in self.feature_names_in_:
            self.woe_mapping_[c] = Xy.groupby(c)["target"].apply(logodds).to_dict()

        self._n_features_out = len(self.woe_mapping_)
        self.is_fitted_ = True

        return self

    def transform(self, X):
        check_is_fitted(self, "is_fitted_")
        if X.shape[1] != self.n_features_:
            raise ValueError('Shape of input is different from what was seen'
                             'in `fit`')

        for c in X.columns:
            X.loc[:, c] = X[c].map(self.woe_mapping_[c])

        return X.values

    def get_feature_names_out(self, input_features=None):
        return [c for c in self.feature_names_in_]

# Sklearn Custom Classes

## Pipeline

In [None]:
class FitExposedPipeline(Pipeline):
    """
    Exposes final pipeline estimator coef_ or feature_importances_
    """
    @property
    def coef_(self):
        return self._final_estimator.coef_
    @property
    def feature_importances_(self):
        return self._final_estimator.feature_importances_