In [2]:
from sklearn.ensemble import RandomTreesEmbedding
import numpy as np

from sklearn.base import BaseEstimator
from scipy.optimize import brenth
from functools import reduce
from functools import partial

import matplotlib.pyplot as plt

In [18]:
################## THIS IS THE BASIC QUANTREGTREE ALGO ######################
from sklearn.base import BaseEstimator
from functools import reduce
class quantregTree(BaseEstimator):
    def __init__(self,alpha,n_trees,max_depth=3,min_samples_leaf=3):
        self.alpha = alpha
        self.n_trees = n_trees
        self.rte = None
        self.leafs = dict()
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        
    def fit(self,X,y):
        self.rte = RandomTreesEmbedding(n_estimators=self.n_trees,\
                                        max_depth=self.max_depth,min_samples_leaf=self.min_samples_leaf) 
        X_ = self.rte.fit_transform(X,y)
        indices = X_.nonzero()
        leafs = indices[1]
        rows = indices[0]
        for leaf in np.unique(leafs):
            values = np.sort(np.array(y)[np.array(rows[leafs==leaf])])#np.sort([y[n] for n in rows[leafs==leaf] ])
            self.leafs.update({leaf:values})  

        return self
    def predict(self,X):
        indices = self.rte.transform(X).nonzero()
        leafs = indices[1]
        rows = indices[0]
        def quantile(n_row):
            leaf_list = leafs[n_row*self.n_trees:(n_row+1)*self.n_trees:]
            final_cdf = np.sort(\
                                reduce(lambda x,n_leaf:x+list(self.leafs[leaf_list[n_leaf]])
                                   , np.arange(0,len(leaf_list)),[])\
                               )
            return final_cdf[int(np.floor(self.alpha*float(len(final_cdf))))]
        ypred = list(map(lambda n:quantile(n),range(X.shape[0])) )
        return ypred

In [31]:
################## THIS IS ESTIMATOR OF CDF USING RANDOM FOREST ######################
from sklearn.base import BaseEstimator
from functools import reduce
class quantregTree(BaseEstimator):
    def __init__(self,y_values,n_trees,max_depth=3,min_samples_leaf=3):
        self.n_trees = n_trees
        self.rte = None
        self.leafs = dict()
        self.max_depth = max_depth
        self.min_samples_leaf = min_samples_leaf
        
        self.y_values = y_values
        
    def fit(self,X,y):
        self.rte = RandomTreesEmbedding(n_estimators=self.n_trees,\
                                        max_depth=self.max_depth,min_samples_leaf=self.min_samples_leaf) 
        X_ = self.rte.fit_transform(X,y)
        indices = X_.nonzero()
        leafs = indices[1]
        rows = indices[0]
        for leaf in np.unique(leafs):
            values = y[np.array(rows[leafs==leaf])]#np.sort([y[n] for n in rows[leafs==leaf] ])
            leaf_list = []
            for y_val in self.y_values:
                leaf_list += [np.sum(values<y_val)/len(values)]
            self.leafs.update({leaf:np.array(leaf_list)})  

        return self
    def predict(self,X):
        indices = self.rte.transform(X).nonzero()
        leafs = indices[1]
        rows = indices[0]
        def quantile(n_row):
            leaf_list = leafs[n_row*self.n_trees:(n_row+1)*self.n_trees:]
            final_cdf = reduce(lambda x,n_leaf:x+self.leafs[leaf_list[n_leaf]]
                                   , np.arange(0,len(leaf_list)) )/self.n_trees
                               
            return final_cdf
        ypred = list(map(lambda n:quantile(n),range(X.shape[0])) )
        return ypred

In [32]:
import time
N = 10000
n = 10
X = np.random.randn(N,n)
y = np.random.randn(N)

In [33]:
%%time
n_y = 100
qt = quantregTree(np.linspace(0,1,100),100,max_depth=3)
qt.fit(X,y)

Wall time: 1.63 s


In [15]:
%%time
n_y = 100
qt = quantregTree(np.linspace(0,1,100),100,max_depth=3)
qt.fit(X,y)

Wall time: 1.97 s


In [None]:
%%time
_ = qt.predict(X)

In [34]:
qt.leafs

{0: array([ 0.33333333,  0.33333333,  0.33333333,  0.33333333,  0.33333333,
         0.33333333,  0.33333333,  0.33333333,  0.33333333,  0.33333333,
         0.33333333,  0.33333333,  0.33333333,  0.33333333,  0.33333333,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.5       ,  0.5       ,
         0.5       ,  0.5       ,  0.5       ,  0.66666667,  0.66666667,
         0.66666667,  0.66666667,  0.66666667,  0.66666667,  0.66666667,
         0.66666667,  0.66666667,  0.66666667,  0.66666667,  0.66666667,
         0.66666667,  0.66666667,  0.66666667,  0.66666667,  0.66666667,
         0.66666667,  0.66666667,  0.66666667,  0.66666667,  0.66666667,
         0.66666667,  0.66666667,  0.66666667,  

In [16]:
a = np.array([0,5,7,1,29])

In [17]:
a[np.array([1,4])]

array([ 5, 29])