In [None]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator,TransformerMixin

In [None]:
#categorical missing value imputer
class CategoricalImputer(BaseEstimator,TransformerMixin):
  def __init__(self,variables=None):
    if not isinstance(variables,list):
      self.variables=[variables]
    else:
      self.variables=variables

  def fit(self,X,y=None):
    return self
  
  def transform(self,X):
    X=X.copy()
    for feat in self.variables:
      X[feat].fillna('missing',inlace=True)
    return X

In [None]:
#Numerical missing value imputer
class NumericalImputer(BaseEstimator,TransformerMixin):
  def __init__(self,variables=None):
    if not isinstance(variables,list):
      self.variables=[variables]
    else:
      self.variables=variables
  
  def fit(self,X,y=None):
    self.imputer_dict={}
    for feat in self.variables:
      self.imputer_dict=X[feat].mode()[0]
    return self

  def transform(self,X):
    X=X.copy()
    for feat in self.variables:
      X[feat].fillna(self.imputer_dict[feat])
    return X

In [None]:
#Temporal Variable Calculator
class TemporalVariableEstimator(BaseEstimator,TransformerMixin):
  def __init__(self,variables=None,ref_variable=None):
    if not isinstance(variables,list):
      self.variables=[variables]
    else:
      self.variables=variables
    self.ref_variable=ref_variable
  
  def fit(self,X,y=None):
    return self

  def transform(self,X):
    X=X.copy()
    for feat in self.variables:
      X[feat]=X[feat]-X[self.ref_variable]
    return X

In [None]:
#frequent label categorical encoder
class RareLabelCategoricalEncoder(BaseEstimator,TransformerMixin):
  def __init__(self,variables=None,tol=0.05):
    self.tol=tol
    if not isinstance(variables,list):
      self.variables=[variables]
    else:
      self.variables=variables
  
  def fit(self,X,y=None):
    self.encoder_dict={}
    for feat in self.variables:
      t=pd.Series(X[feat].value_counts()/np.float(len(X)))
      #frequent labels
      self.encoder_dict[feat]=list(t[t>=self.tol].index)
      return self
  
  def transform(self,X):
    X=X.copy()
    for feat in self.variables:
      X[feat]=np.where(X[feat].isin(self.encoder_dict[feat]),X[feat],'Rare')
    return X


In [None]:
#strings to number 
class CategoricalEncoder(BaseEstimator,TransformerMixin):
  def __init__(self,variables=None):
    if not isinstance(variables,list):
      self.variables=[variables]
    else:
      self.variables=variables

  def fit(self,X,y=None):
    temp=pd.concat([X,y],axis=1)
    temp.columns=list(X.columns)+['target']
    self.encoder_dict={}
    for var in self.variables:
      t=temp.groupby(var)['target'].mean().sort_values(ascending=True).index
      self.encoder_dict[var]={k:i for i,k in enumerate(t,0)}
    return self

  def transform(self,X):
    X=X.copy()
    for var in self.variables:
      X[var]=X[var].map(self.encoder_dict)

    #Check if transformer doesnot produces nan value
    if[self.variables].isnul().any().any():
      raise ValueError # now for demo reason we introducing very basic error vlue error in practical scenario complex error is given along with reason of failure
    return X

In [None]:
#Logarithmic transformer 
class LogTransformer(BaseEstimator,TransformerMixin):
  def __init__(self,variables=None):
    if not isinstance(list,variables):
      self.variables=[variables]
    else:
      self.variables=variables
    
  def fit(self,X,y=None):
    return self

  def transformer(self,X):
    X=X.copy()
    if not (X[self.variables] >0).all().all():
      vars=self.variables[(X[self.variables]<=0).any()]
      raise ValueError

    for feat in self.variables:
      X[feat]=np.log(X[feat])
    return X


In [None]:
## Now we have to simply calculate cat numerical and categorigal value and pass in it using sklearn pipeline and result came in front of you