In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('../data/Melbourne_housing_FULL.csv')

# Stripping non-important features
df.columns = df.columns.str.strip()
scrubbed = ['Address','Method','SellerG','Date','Postcode','Lattitude','Longtitude','Regionname','Propertycount']
for title in scrubbed:
    del df[title]

# Remove rows with emptiness
df.dropna(axis=0,how='any',thresh=None,subset=None,inplace=True)

# One-hot encoding
df = pd.get_dummies(df,columns=['Suburb','CouncilArea','Type'])

# Set independent and dependent vars
X = df.drop('Price', axis=1)
y = df['Price']

# Shuffle and split training/testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,shuffle=True)

# Create model
model = ensemble.GradientBoostingRegressor(
                                            n_estimators=150,
                                            learning_rate=0.1,
                                            max_depth=30,
                                            min_samples_split=4,
                                            min_samples_leaf=6,
                                            max_features=0.6,
                                            loss='huber'
                                          )
# Train model
model.fit(X_train, y_train)

GradientBoostingRegressor(loss='huber', max_depth=30, max_features=0.6,
                          min_samples_leaf=6, min_samples_split=4,
                          n_estimators=150)

In [2]:
# First test
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 26381.94
TEST MAE: 169482.66


In [5]:
# Lower depth and increase tree count
model = ensemble.GradientBoostingRegressor(
                                            n_estimators=250,
                                            learning_rate=0.1,
                                            max_depth=5,
                                            min_samples_split=4,
                                            min_samples_leaf=6,
                                            max_features=0.6,
                                            loss='huber'
                                          )
model.fit(X_train, y_train)

GradientBoostingRegressor(loss='huber', max_depth=5, max_features=0.6,
                          min_samples_leaf=6, min_samples_split=4,
                          n_estimators=250)

In [6]:
# Test new model
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 120194.96
TEST MAE: 163573.82


In [11]:
# Implement grid search
from sklearn.model_selection import GridSearchCV

model = ensemble.GradientBoostingRegressor()

hyperparams = {
    'n_estimators':[200,300],
    'max_depth':[4,6],
    'min_samples_split':[3,4],
    'min_samples_leaf':[5,6],
    'learning_rate':[0.01,0.02],
    'max_features':[0.8,0.9],
    'loss':['ls','lad','huber']
}

grid = GridSearchCV(model, hyperparams, n_jobs=4, verbose=10)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[CV 2/5; 1/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 2/5; 1/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.684 total time=   5.4s
[CV 2/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 2/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.743 total time=   7.5s
[CV 1/5; 3/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 1/5; 3/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.696 total time=   5.3s
[CV 

[CV 4/5; 21/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200[CV 3/5; 1/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 1/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.680 total time=   5.5s
[CV 3/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 3/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.748 total time=   7.5s
[CV 2/5; 3/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 2/5; 3/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5

[CV 5/5; 21/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200[CV 4/5; 1/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 4/5; 1/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.671 total time=   5.4s
[CV 1/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 1/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.752 total time=   7.5s
[CV 5/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 5/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5

[CV 1/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 5/5; 21/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.760 total time=   7.9s
[CV 4/5; 22/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.774 total time=  12.2s
[CV 3/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 3/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.746 total time=   8.4s
[CV 2/5; 24/192] START learning_rate=0.01, loss=ls, max_depth

[CV 1/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.552 total time=   6.6s
[CV 1/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.797 total time=  11.9s
[CV 5/5; 22/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 5/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.801 total time=  12.2s
[CV 5/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 5/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.759 total time=   8.2s
[CV 4/5; 24/192] START learni

[CV 3/5; 42/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.683 total time=   9.8s
[CV 4/5; 21/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.727 total time=   7.9s
[CV 3/5; 22/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 3/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.791 total time=  12.1s
[CV 2/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 2/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.742 total time=   8.4s
[CV 1/5; 24/192] START learni

[CV 4/5; 42/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.575 total time=   9.8s
[CV 1/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.758 total time=   8.2s
[CV 4/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.727 total time=   8.2s
[CV 3/5; 24/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 3/5; 24/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.792 total time=  13.2s
[CV 2/5; 25/192] START learni

[CV 5/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.569 total time=   7.0s
[CV 2/5; 43/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 2/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.554 total time=   6.7s
[CV 1/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 1/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.629 total time=  10.3s
[CV 5/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 44/192] END learning_rate=0.01, loss=lad, max_

[CV 2/5; 64/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 4/5; 43/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 4/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.515 total time=   6.8s
[CV 3/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 3/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.685 total time=  10.4s
[CV 2/5; 45/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 2/5; 45/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_

[CV 5/5; 63/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 4/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.578 total time=  10.4s
[CV 3/5; 45/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 3/5; 45/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.619 total time=   7.4s
[CV 2/5; 46/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 2/5; 46/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_

[CV 2/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 43/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 3/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.618 total time=   6.9s
[CV 2/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 2/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.614 total time=  10.4s
[CV 1/5; 45/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 1/5; 45/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, mi

[CV 5/5; 64/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 2/5; 64/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.701 total time=  14.6s
[CV 1/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 1/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.639 total time=   6.2s
[CV 4/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 4/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.584 total time=   6.2s
[CV 3/5; 66/192] START learning_rate=0.01, loss

[CV 1/5; 84/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 63/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.634 total time=   9.8s
[CV 4/5; 64/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 4/5; 64/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.673 total time=  14.6s
[CV 3/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.669 total time=   6.2s
[CV 2/5; 66/192] START learning_rate=0.01, loss=h

[CV 3/5; 84/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 2/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.619 total time=   6.3s
[CV 5/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 5/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.644 total time=   6.3s
[CV 4/5; 66/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 4/5; 66/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.668 total time=   9.4s
[CV 3/5; 67/192] START learning_rate=0.01, 

[CV 2/5; 86/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 5/5; 64/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.715 total time=  14.9s
[CV 1/5; 66/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 1/5; 66/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.708 total time=   9.2s
[CV 5/5; 66/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 5/5; 66/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.717 total time=   9.5s
[CV 1/5; 68/192] START learning_rate=0.01, lo

[CV 3/5; 86/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 1/5; 84/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.775 total time=  14.0s
[CV 5/5; 84/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 84/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.784 total time=  13.9s
[CV 5/5; 85/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 5/5; 85/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.723 total time=   9.7s
[CV 4/5; 86/192] START learning_rate=0.01, 

[CV 3/5; 104/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 3/5; 84/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.785 total time=  13.9s
[CV 2/5; 85/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 2/5; 85/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.695 total time=   9.4s
[CV 1/5; 86/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 1/5; 86/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.775 total time=  14.2s
[CV 5/5; 86/192] START learning_rate=0.01, lo

[CV 1/5; 105/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 2/5; 86/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.762 total time=  14.2s
[CV 1/5; 87/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 1/5; 87/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.718 total time=   9.1s
[CV 4/5; 87/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 87/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.670 total time=   9.7s
[CV 3/5; 88/192] START learning_rate=0.01, lo

[CV 1/5; 107/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 3/5; 86/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.783 total time=  14.0s
[CV 2/5; 87/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 2/5; 87/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.695 total time=   9.4s
[CV 1/5; 88/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 1/5; 88/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.775 total time=  14.3s
[CV 5/5; 88/192] START learning_rate=0.01, lo

[CV 1/5; 108/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 1/5; 105/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.778 total time=   5.2s
[CV 5/5; 105/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 5/5; 105/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.786 total time=   5.5s
[CV 4/5; 106/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 4/5; 106/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.787 total time=   8.2s
[CV 3/5; 107/192] START learning_rate=0.02, loss=ls, ma

[CV 5/5; 125/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.815 total time=   8.4s
[CV 3/5; 104/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.796 total time=   7.0s
[CV 2/5; 105/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 2/5; 105/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.771 total time=   5.1s
[CV 1/5; 106/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 1/5; 106/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.802 total time=   8.4s
[CV 5/5; 106/192] START 

[CV 3/5; 125/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.807 total time=   7.6s
[CV 1/5; 107/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.777 total time=   5.4s
[CV 4/5; 107/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 4/5; 107/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.759 total time=   5.3s
[CV 3/5; 108/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 3/5; 108/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.796 total time=   7.8s
[CV 2/5; 109/192] START 

[CV 2/5; 127/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.799 total time=   9.4s
[CV 1/5; 108/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.806 total time=   7.8s
[CV 5/5; 108/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 108/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.805 total time=   7.7s
[CV 1/5; 110/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 1/5; 110/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.803 total time=   7.7s
[CV 5/5; 110/192] START 

[CV 3/5; 129/192] END learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.713 total time=   5.7s
[CV 4/5; 126/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 126/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.804 total time=  13.3s
[CV 3/5; 127/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 3/5; 127/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.804 total time=   9.2s
[CV 2/5; 128/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 2/5; 128/192] END learning_rate=0.02, loss=ls, max

[CV 1/5; 146/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.788 total time=  14.0s
[CV 2/5; 126/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 2/5; 126/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.819 total time=  12.8s
[CV 1/5; 127/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 1/5; 127/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.810 total time=   9.3s
[CV 4/5; 127/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 127/192] END learning_rate=0.02, loss=ls, max

[CV 4/5; 146/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.753 total time=  14.1s
[CV 1/5; 128/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 1/5; 128/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.827 total time=  12.2s
[CV 5/5; 128/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 5/5; 128/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.827 total time=  11.2s
[CV 5/5; 129/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 5/5; 129/192] END learning_rate=0.02, loss=lad, m

[CV 1/5; 148/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.777 total time=  16.4s
[CV 2/5; 130/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 2/5; 130/192] END learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.700 total time=   8.5s
[CV 1/5; 131/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 1/5; 131/192] END learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.660 total time=   6.0s
[CV 4/5; 131/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 4/5; 131/192] END learning_rate=0.02, loss=la

[CV 1/5; 150/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.787 total time=  15.4s
[CV 3/5; 147/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 3/5; 147/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.779 total time=  10.5s
[CV 2/5; 148/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 2/5; 148/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.766 total time=  16.0s
[CV 1/5; 149/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 1/5; 149/192] END learning_rate=0.02, loss=la

[CV 1/5; 167/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.740 total time=   6.4s
[CV 5/5; 146/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 5/5; 146/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.789 total time=  14.5s
[CV 5/5; 147/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 5/5; 147/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.753 total time=  11.7s
[CV 4/5; 148/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 4/5; 148/192] END learning_rate=0.02, loss=

[CV 3/5; 166/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.786 total time=   9.5s
[CV 5/5; 148/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 148/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.779 total time=  15.3s
[CV 5/5; 149/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 5/5; 149/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.759 total time=  10.7s
[CV 4/5; 150/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 150/192] END learning_rate=0.02, loss=

[CV 4/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.748 total time=   9.1s
[CV 5/5; 150/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 5/5; 150/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.789 total time=  14.4s
[CV 5/5; 151/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 5/5; 151/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.754 total time=   9.6s
[CV 4/5; 152/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 4/5; 152/192] END learning_rate=0.02, loss=

[CV 1/5; 172/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.771 total time=  11.3sTRAIN MAE: 137940.92
TEST MAE: 173835.95

[CV 4/5; 167/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 167/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.710 total time=   6.3s
[CV 3/5; 168/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 3/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.786 total time=   9.3s
[CV 2/5; 169/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=


[CV 2/5; 167/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 2/5; 167/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.726 total time=   6.4s
[CV 1/5; 168/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 1/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.777 total time=   9.2s
[CV 5/5; 168/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 5/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.784 total time=  10.1s
[CV 1/5; 170/192] START learning_rat


[CV 3/5; 169/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 169/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.757 total time=   7.9s
[CV 2/5; 170/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 2/5; 170/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.760 total time=  10.9s
[CV 1/5; 171/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 1/5; 171/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.741 total time=   7.2s
[CV 4/5; 171/192] START learning_rat


[CV 5/5; 172/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 172/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.784 total time=  10.8s
[CV 5/5; 173/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 5/5; 173/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.752 total time=   7.0s
[CV 4/5; 174/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 174/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.752 total time=   9.7s
[CV 3/5; 175/192] START learning_rat

In [15]:
grid.best_params_
mae_train = mean_absolute_error(y_train,grid.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,grid.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 137940.92
TEST MAE: 173835.95


In [17]:
# Attempting to use different hyperparameters from gridsearch

model = ensemble.GradientBoostingRegressor(
    n_estimators=300,
    max_depth=6,
    min_samples_split=4,
    min_samples_leaf=6,
    learning_rate=0.02,
    max_features=0.9,
    loss='huber'
)

model.fit(X_train, y_train)
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 137777.10
TEST MAE: 174353.58


In [18]:
# Attempt grid search with randomize seed
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,shuffle=True, random_state=42)

model = ensemble.GradientBoostingRegressor()

hyperparams = {
    'n_estimators':[200,250, 300],
    'max_depth':[4,5,6],
    'min_samples_split':[3,4,5],
    'min_samples_leaf':[5,6,7],
    'learning_rate':[0.01,0.02],
    'max_features':[0.6,0.7,0.8,0.9],
    'loss':['ls','lad','huber']
}

grid = GridSearchCV(model, hyperparams, n_jobs=4, verbose=10)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 1944 candidates, totalling 9720 fits


KeyboardInterrupt: 

In [4]:
#Moving back to specific hyperparameters for the purpose of attempting single prediction
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('../data/Melbourne_housing_FULL.csv')

# Stripping non-important features
df.columns = df.columns.str.strip()
scrubbed = ['Address','Method','SellerG','Date','Postcode','Lattitude','Longtitude','Regionname','Propertycount']
for title in scrubbed:
    del df[title]

# Remove rows with emptiness
df.dropna(axis=0,how='any',thresh=None,subset=None,inplace=True)

# One-hot encoding
df = pd.get_dummies(df,columns=['Suburb','CouncilArea','Type'])

# Set independent and dependent vars
X = df.drop('Price', axis=1)
y = df['Price']

model = ensemble.GradientBoostingRegressor(
                                            n_estimators=250,
                                            learning_rate=0.1,
                                            max_depth=5,
                                            min_samples_split=4,
                                            min_samples_leaf=6,
                                            max_features=0.6,
                                            loss='huber'
                                          )

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,shuffle=True, random_state=42)

model.fit(X_train, y_train)

NameError: name 'df' is not defined