In [2]:
from sklearn.pipeline import TransformerMixin
from scipy.spatial.distance import cdist
import numpy as np

class OutlierExtractor(TransformerMixin):
    def __init__(self, threshold=3, columns=None):
        self.threshold = threshold
        self.columns = columns
        
        
    def fit(self, X, y=None):
        self.outliers_zscore = dict()
        if self.columns is None:
            self.columns = X.columns
            
        for column in self.columns:
            x = np.asarray(X[column]).reshape(-1,1)
            std = np.std(x)
            self.outliers_zscore[column] = (np.abs(cdist(x.mean(axis=0)[np.newaxis], x) / std)[0])
            
        return self
    
    
    def transform(self, X):
        pos_outliers = []
        pos_outliers = np.array(pos_outliers)
        
        for column in self.columns:
            pos_outliers = np.concatenate((np.where(self.outliers_zscore[column] > self.threshold), pos_outliers), axis=None)
            
        return X.drop(set(pos_outliers), inplace=False)