In [7]:
import pandas as pd
import numpy as np
import math
from sys import float_info

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import mean_absolute_error

class Binner(BaseEstimator,TransformerMixin):
    '''
    binner for continuous feature
    '''
    def __init__(self,strategy=None):
        self.strategy=strategy
        
    def fit(self, X, y=None):
        if self.strategy is None and y is not None:
            min_err=float_info.max
            best_q=0
            for q in range(2,100):
                s=pd.qcut(X,q,label=False)
                err=mean_absolute_error(s,y)
                if err < min_err:
                    best_q=q
            print('best q: '+ q)
        return self
            
    
    def transform(self, X, y=None):
        if isinstance(self.strategy,list):
            arr=np.zeros((len(X),1), dtype=np.int)
            for i in range(len(X)):
                arr[i][0]=len(self.strategy)
                for j in range(len(self.strategy)):
                    if X[i][0] < self.strategy[j]:
                        arr[i][0]=j
                        break
                
            return arr
        
    
    def fit_transform(self, X, y=None):
        self.fit(X)
        return self.transform(X)
    
import unittest as ut

from sklearn.pipeline import Pipeline

class Test(ut.TestCase):
    def testRange(self):
        b=Binner(strategy=list(np.linspace(10,100,10)))
        
        df=pd.DataFrame({'age':[3.,17.,66.]})
        data=b.transform(df[['age']].values)  
        
        self.assertTrue(np.array_equal(np.array([[0],[1],[6]]), data))
        
                
if __name__ == '__main__':
    ut.main(argv=['ignored', '-v'], exit=False)

testRange (__main__.Test) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.016s

OK


In [6]:
from sys import float_info
float_info.max

1.7976931348623157e+308