In [None]:
from ＿future＿ import annotations
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
import pandas as pd
import numpy as np


class OrdinalEncoder(object):

    def __init__(self, columns: list):
        self.columns = columns
        self.encoders = {column: LabelEncoder() for column in self.columns}

    def fit(self, data: pd.DataFrame) -> OrdinalEncoder:
        transformed = pd.DataFrame()
        for column in self.columns:
            if column not in data:
                print(f'Warning: {column} not in data')
                continue
            transformed[column] = self.encoders[column].fit_transform(data[column])
        return self

    def transform(self, data: pd.DataFrame) -> dict:
        transformed = pd.DataFrame()
        for column in self.columns:
            try:
                transformed[column] = self.encoders[column].transform(data[column])
            except TypeError as e:
                raise TypeError(f'{e} while processing {column}')
        transformed.index = data.index
        return transformed

    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)

    @property
    def state_dict(self):
        state_dict = {}
        for column in self.columns:
            classes = self.encoders[column].classes_
            value_idx_mapping = {class_: idx for idx, class_ in enumerate(classes)}
            state_dict[column] = value_idx_mapping
        return state_dict

In [None]:
class TargetEncoder(object):
    def __init__(self, min_samples_leaf=1, smoothing=1, noise_level=0):
        self.min_samples_leaf = min_samples_leaf
        self.smoothing = smoothing
        self.noise_level = noise_level

    def fit(self, x, y):
        temp = pd.concat([x, y], axis=1)
        averages = temp.groupby(x.name)[y.name].agg(['mean', 'count'])
        smoothing = 1 / (1 + np.exp(-(averages['count'] - self.min_samples_leaf) / self.smoothing))
        self.prior = y.mean()
        averages[y.name] = self.prior * (1 - smoothing) + averages['mean'] * smoothing
        averages.drop(['mean', 'count'], axis=1, inplace=True)
        self.averages = averages
        self.y_name = y.name
        return self

    def transform(self, x: pd.Series) -> pd.Series:
        ft_x = pd.merge(x.to_frame(x.name),
                        self.averages.reset_index().rename(columns={'index': self.y_name, self.y_name: 'average'}),
                        on=x.name, how='left')['average'].rename(x.name + '_mean').fillna(self.prior)
        ft_x.index = x.index

        return self.add_noise(ft_x)

    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)

    def add_noise(self, s):
        return s * (1 + self.noise_level * np.random.randint(len(s)))

    def target_encode(x, y, x_test, min_samples_leaf=1, smoothing=1, noise_level=0):
        ft_x_test = pd.merge(x_test.to_frame(x_test.name),
                             averages.reset_index().rename(columns={'index': y.name, y.name: 'average'}),
                             on=x_test.name, how='left')['average'].rename(x_test.name + 'mean').fillna(prior)
        ft_x_test.index = x_test.index
        return add_noise(ft_x, noise_level), add_noise(ft_x_test, noise_level)