In [9]:
from sklearn.base import BaseEstimator, clone
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import r2_score
import numpy as np
import pandas as pd

In [2]:
np.random.seed(357823)

In [10]:
class SequentialRegressor(BaseEstimator):
    def __init__(self, estimator, n_estimators,
                sample_size, lr, hyperparameters):
        self.estimator = estimator
        self.n_estimators = n_estimators
        self.sample_size = sample_size
        self.lr = lr
        self.hyperparameters = hyperparameters

    ## Dos arrays de entrada, objetivo dimensión (rows,)
    def fit(self, attributes, objective):
        rows = attributes.shape[0]
        prediction = np.zeros(rows)
        self.models = []

        for i in range(self.n_estimators):
            remainder_i = objective - prediction
            n_samples_i = int(self.sample_size * rows)
            selected = np.random.choice(rows, size=n_samples_i, replace=False)
            samples_i = attributes[selected]
            objective_i = remainder_i[selected]
            model = clone(self.estimator)
            model.set_params(**self.hyperparameters)
            model.fit(samples_i, objective_i)
            self.models.append(model)
            prediction_i = model.predict(attributes)
            prediction = prediction + self.lr * prediction_i
        return self.models

    ## Array de entrada
    def predict(self, attributes):
        rows = attributes.shape[0]
        predictions = np.zeros(rows)
        for model in self.models:
            predictions += self.lr * model.predict(attributes)
        return predictions

In [20]:
houses = pd.read_csv("house_prices.csv")
houses.head()

Unnamed: 0,GarageCars,Condition2,YearBuilt,GarageYrBlt,LandContour,LowQualFinSF,HouseStyle,GarageType,MSSubClass,WoodDeckSF,...,MiscVal,BsmtExposure,OpenPorchSF,ExterCond,Fireplaces,FullBath,BsmtQual,MiscFeature,PoolQC,SalePrice
0,2,Norm,1962,1977.0,Lvl,0,1Story,Detchd,20,0,...,0,none,0,TA,0,1,TA,none,none,132000
1,0,Norm,1914,0.0,Lvl,0,2.5Unf,none,75,0,...,0,none,291,TA,1,2,TA,none,none,128000
2,2,Norm,1999,1999.0,Lvl,0,1Story,Attchd,20,0,...,0,Av,35,TA,0,2,Gd,none,none,192000
3,1,Norm,1948,1948.0,Bnk,0,2Story,Attchd,20,103,...,0,none,0,Gd,0,3,TA,none,none,225000
4,2,Norm,1950,1950.0,Lvl,0,1Story,Detchd,20,0,...,0,none,29,TA,0,1,none,none,none,109900


In [21]:
atributos = houses.iloc[:,:-1]
objetivo = houses.iloc[:,-1:]
atributos_discretos = atributos.select_dtypes(include=['object', 'string']).columns
encoder = OrdinalEncoder()
encoder.fit(atributos[atributos_discretos])
atributos[atributos_discretos] = encoder.transform(atributos[atributos_discretos])

In [22]:
(atributos_entrenamiento, atributos_prueba,
 objetivo_entrenamiento, objetivo_prueba) = train_test_split(atributos, objetivo,test_size=.2)

In [57]:
hyperparameters = {
    'max_depth' : 10
}
decisionTree = SequentialRegressor(DecisionTreeRegressor(), 4, 0.75, 0.5, hyperparameters)

In [58]:
print(decisionTree.fit(np.array(atributos_entrenamiento), np.array(objetivo_entrenamiento).ravel()))
predictions = decisionTree.predict(np.array(atributos_prueba))
print(predictions)

[DecisionTreeRegressor(max_depth=10), DecisionTreeRegressor(max_depth=10), DecisionTreeRegressor(max_depth=10), DecisionTreeRegressor(max_depth=10)]
[152299.26673568 130648.98140208 135514.05359791 189480.13766232
 231255.51417014 207725.10751389  85001.61771251 200940.68656654
 184387.4520336  107581.35215434 114417.49854206 122890.99301728
 286024.5745953   92655.49696944 264704.96745245 127680.58542487
 136198.55632274 102560.99301728 118746.77283665 206773.20654781
 199061.40085225 138905.48758561 299214.21298391 100563.48359485
 105996.36735019 169498.503824   205328.65075119 151848.6460724
 157316.17760175 123733.55752565 126086.35854058 176575.42175323
 102090.80963683 152075.03391447 113440.94176722 137395.21818352
 304237.62238403 120350.32797781 176972.02334999 200189.08974567
 146964.09469798 120544.10406837 135635.76609107 235816.79812488
 116799.31458744  94291.41880805 178105.49176134 361466.75
 122967.2005028  126579.42580225 128734.10802317 111646.7073605
 130648.981402

In [59]:
score = r2_score(np.array(objetivo_prueba), predictions)
print(score)

0.7651507926477501


In [26]:
comparar_tree = DecisionTreeRegressor(max_depth=10)
comparar_tree.fit(np.array(atributos_entrenamiento), np.array(objetivo_entrenamiento).ravel())
comparar_predictions = comparar_tree.predict(np.array(atributos_prueba))
score2 = r2_score(np.array(objetivo_prueba), comparar_predictions)
print(score2)

0.6267862030458484
