In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('../data/Melbourne_housing_FULL.csv')

# Stripping non-important features
df.columns = df.columns.str.strip()
scrubbed = ['Address','Method','SellerG','Date','Postcode','Lattitude','Longtitude','Regionname','Propertycount']
for title in scrubbed:
    del df[title]

# Remove rows with emptiness
df.dropna(axis=0,how='any',thresh=None,subset=None,inplace=True)

# One-hot encoding
df = pd.get_dummies(df,columns=['Suburb','CouncilArea','Type'])

# Set independent and dependent vars
X = df.drop('Price', axis=1)
y = df['Price']

# Shuffle and split training/testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,shuffle=True)

# Create model
model = ensemble.GradientBoostingRegressor(
                                            n_estimators=150,
                                            learning_rate=0.1,
                                            max_depth=30,
                                            min_samples_split=4,
                                            min_samples_leaf=6,
                                            max_features=0.6,
                                            loss='huber'
                                          )
# Train model
model.fit(X_train, y_train)

GradientBoostingRegressor(loss='huber', max_depth=30, max_features=0.6,
                          min_samples_leaf=6, min_samples_split=4,
                          n_estimators=150)

In [2]:
# First test
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 26381.94
TEST MAE: 169482.66


In [5]:
# Lower depth and increase tree count
model = ensemble.GradientBoostingRegressor(
                                            n_estimators=250,
                                            learning_rate=0.1,
                                            max_depth=5,
                                            min_samples_split=4,
                                            min_samples_leaf=6,
                                            max_features=0.6,
                                            loss='huber'
                                          )
model.fit(X_train, y_train)

GradientBoostingRegressor(loss='huber', max_depth=5, max_features=0.6,
                          min_samples_leaf=6, min_samples_split=4,
                          n_estimators=250)

In [6]:
# Test new model
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 120194.96
TEST MAE: 163573.82


In [11]:
# Implement grid search
from sklearn.model_selection import GridSearchCV

model = ensemble.GradientBoostingRegressor()

hyperparams = {
    'n_estimators':[200,300],
    'max_depth':[4,6],
    'min_samples_split':[3,4],
    'min_samples_leaf':[5,6],
    'learning_rate':[0.01,0.02],
    'max_features':[0.8,0.9],
    'loss':['ls','lad','huber']
}

grid = GridSearchCV(model, hyperparams, n_jobs=4, verbose=10)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 192 candidates, totalling 960 fits
[CV 2/5; 1/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 2/5; 1/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.684 total time=   5.4s
[CV 2/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 2/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.743 total time=   7.5s
[CV 1/5; 3/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 1/5; 3/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.696 total time=   5.3s
[CV 

[CV 4/5; 21/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200[CV 3/5; 1/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 1/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.680 total time=   5.5s
[CV 3/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 3/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.748 total time=   7.5s
[CV 2/5; 3/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 2/5; 3/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5

[CV 5/5; 21/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200[CV 4/5; 1/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 4/5; 1/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.671 total time=   5.4s
[CV 1/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 1/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.752 total time=   7.5s
[CV 5/5; 2/192] START learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 5/5; 2/192] END learning_rate=0.01, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=5

[CV 1/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 5/5; 21/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.760 total time=   7.9s
[CV 4/5; 22/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.774 total time=  12.2s
[CV 3/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 3/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.746 total time=   8.4s
[CV 2/5; 24/192] START learning_rate=0.01, loss=ls, max_depth

[CV 1/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.552 total time=   6.6s
[CV 1/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.797 total time=  11.9s
[CV 5/5; 22/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 5/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.801 total time=  12.2s
[CV 5/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 5/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.759 total time=   8.2s
[CV 4/5; 24/192] START learni

[CV 3/5; 42/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.683 total time=   9.8s
[CV 4/5; 21/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.727 total time=   7.9s
[CV 3/5; 22/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 3/5; 22/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.791 total time=  12.1s
[CV 2/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 2/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.742 total time=   8.4s
[CV 1/5; 24/192] START learni

[CV 4/5; 42/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.575 total time=   9.8s
[CV 1/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.758 total time=   8.2s
[CV 4/5; 23/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 23/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.727 total time=   8.2s
[CV 3/5; 24/192] START learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 3/5; 24/192] END learning_rate=0.01, loss=ls, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.792 total time=  13.2s
[CV 2/5; 25/192] START learni

[CV 5/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.569 total time=   7.0s
[CV 2/5; 43/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 2/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.554 total time=   6.7s
[CV 1/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 1/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.629 total time=  10.3s
[CV 5/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 44/192] END learning_rate=0.01, loss=lad, max_

[CV 2/5; 64/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 4/5; 43/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 4/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.515 total time=   6.8s
[CV 3/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 3/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.685 total time=  10.4s
[CV 2/5; 45/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 2/5; 45/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_

[CV 5/5; 63/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 4/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.578 total time=  10.4s
[CV 3/5; 45/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 3/5; 45/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.619 total time=   7.4s
[CV 2/5; 46/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 2/5; 46/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_

[CV 2/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 43/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 3/5; 43/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.618 total time=   6.9s
[CV 2/5; 44/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 2/5; 44/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.614 total time=  10.4s
[CV 1/5; 45/192] START learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 1/5; 45/192] END learning_rate=0.01, loss=lad, max_depth=4, max_features=0.9, mi

[CV 5/5; 64/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 2/5; 64/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.701 total time=  14.6s
[CV 1/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 1/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.639 total time=   6.2s
[CV 4/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 4/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.584 total time=   6.2s
[CV 3/5; 66/192] START learning_rate=0.01, loss

[CV 1/5; 84/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 63/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.634 total time=   9.8s
[CV 4/5; 64/192] START learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 4/5; 64/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.673 total time=  14.6s
[CV 3/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.669 total time=   6.2s
[CV 2/5; 66/192] START learning_rate=0.01, loss=h

[CV 3/5; 84/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 2/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.619 total time=   6.3s
[CV 5/5; 65/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 5/5; 65/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.644 total time=   6.3s
[CV 4/5; 66/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 4/5; 66/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.668 total time=   9.4s
[CV 3/5; 67/192] START learning_rate=0.01, 

[CV 2/5; 86/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 5/5; 64/192] END learning_rate=0.01, loss=lad, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.715 total time=  14.9s
[CV 1/5; 66/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 1/5; 66/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.708 total time=   9.2s
[CV 5/5; 66/192] START learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 5/5; 66/192] END learning_rate=0.01, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.717 total time=   9.5s
[CV 1/5; 68/192] START learning_rate=0.01, lo

[CV 3/5; 86/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 1/5; 84/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.775 total time=  14.0s
[CV 5/5; 84/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 84/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.784 total time=  13.9s
[CV 5/5; 85/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 5/5; 85/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.723 total time=   9.7s
[CV 4/5; 86/192] START learning_rate=0.01, 

[CV 3/5; 104/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 3/5; 84/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.785 total time=  13.9s
[CV 2/5; 85/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 2/5; 85/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.695 total time=   9.4s
[CV 1/5; 86/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 1/5; 86/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.775 total time=  14.2s
[CV 5/5; 86/192] START learning_rate=0.01, lo

[CV 1/5; 105/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 2/5; 86/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.762 total time=  14.2s
[CV 1/5; 87/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 1/5; 87/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.718 total time=   9.1s
[CV 4/5; 87/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 87/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.670 total time=   9.7s
[CV 3/5; 88/192] START learning_rate=0.01, lo

[CV 1/5; 107/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 3/5; 86/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.783 total time=  14.0s
[CV 2/5; 87/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 2/5; 87/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.695 total time=   9.4s
[CV 1/5; 88/192] START learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 1/5; 88/192] END learning_rate=0.01, loss=huber, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.775 total time=  14.3s
[CV 5/5; 88/192] START learning_rate=0.01, lo

[CV 1/5; 108/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 1/5; 105/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.778 total time=   5.2s
[CV 5/5; 105/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 5/5; 105/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.786 total time=   5.5s
[CV 4/5; 106/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 4/5; 106/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.787 total time=   8.2s
[CV 3/5; 107/192] START learning_rate=0.02, loss=ls, ma

[CV 5/5; 125/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.815 total time=   8.4s
[CV 3/5; 104/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.796 total time=   7.0s
[CV 2/5; 105/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 2/5; 105/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.771 total time=   5.1s
[CV 1/5; 106/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 1/5; 106/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.802 total time=   8.4s
[CV 5/5; 106/192] START 

[CV 3/5; 125/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.807 total time=   7.6s
[CV 1/5; 107/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.777 total time=   5.4s
[CV 4/5; 107/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 4/5; 107/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.759 total time=   5.3s
[CV 3/5; 108/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 3/5; 108/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.796 total time=   7.8s
[CV 2/5; 109/192] START 

[CV 2/5; 127/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.799 total time=   9.4s
[CV 1/5; 108/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.806 total time=   7.8s
[CV 5/5; 108/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 108/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.805 total time=   7.7s
[CV 1/5; 110/192] START learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 1/5; 110/192] END learning_rate=0.02, loss=ls, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.803 total time=   7.7s
[CV 5/5; 110/192] START 

[CV 3/5; 129/192] END learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.713 total time=   5.7s
[CV 4/5; 126/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 126/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.804 total time=  13.3s
[CV 3/5; 127/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 3/5; 127/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.804 total time=   9.2s
[CV 2/5; 128/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 2/5; 128/192] END learning_rate=0.02, loss=ls, max

[CV 1/5; 146/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.788 total time=  14.0s
[CV 2/5; 126/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 2/5; 126/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.819 total time=  12.8s
[CV 1/5; 127/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 1/5; 127/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.810 total time=   9.3s
[CV 4/5; 127/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 127/192] END learning_rate=0.02, loss=ls, max

[CV 4/5; 146/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.753 total time=  14.1s
[CV 1/5; 128/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 1/5; 128/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.827 total time=  12.2s
[CV 5/5; 128/192] START learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 5/5; 128/192] END learning_rate=0.02, loss=ls, max_depth=6, max_features=0.9, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.827 total time=  11.2s
[CV 5/5; 129/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 5/5; 129/192] END learning_rate=0.02, loss=lad, m

[CV 1/5; 148/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.777 total time=  16.4s
[CV 2/5; 130/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 2/5; 130/192] END learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.700 total time=   8.5s
[CV 1/5; 131/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 1/5; 131/192] END learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.660 total time=   6.0s
[CV 4/5; 131/192] START learning_rate=0.02, loss=lad, max_depth=4, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 4/5; 131/192] END learning_rate=0.02, loss=la

[CV 1/5; 150/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.787 total time=  15.4s
[CV 3/5; 147/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 3/5; 147/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.779 total time=  10.5s
[CV 2/5; 148/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 2/5; 148/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.766 total time=  16.0s
[CV 1/5; 149/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 1/5; 149/192] END learning_rate=0.02, loss=la

[CV 1/5; 167/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.740 total time=   6.4s
[CV 5/5; 146/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 5/5; 146/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.789 total time=  14.5s
[CV 5/5; 147/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 5/5; 147/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.753 total time=  11.7s
[CV 4/5; 148/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 4/5; 148/192] END learning_rate=0.02, loss=

[CV 3/5; 166/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.786 total time=   9.5s
[CV 5/5; 148/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 148/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.779 total time=  15.3s
[CV 5/5; 149/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 5/5; 149/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.759 total time=  10.7s
[CV 4/5; 150/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 150/192] END learning_rate=0.02, loss=

[CV 4/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.748 total time=   9.1s
[CV 5/5; 150/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 5/5; 150/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.789 total time=  14.4s
[CV 5/5; 151/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 5/5; 151/192] END learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.754 total time=   9.6s
[CV 4/5; 152/192] START learning_rate=0.02, loss=lad, max_depth=6, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 4/5; 152/192] END learning_rate=0.02, loss=

[CV 1/5; 172/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.771 total time=  11.3sTRAIN MAE: 137940.92
TEST MAE: 173835.95

[CV 4/5; 167/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 4/5; 167/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.710 total time=   6.3s
[CV 3/5; 168/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 3/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.786 total time=   9.3s
[CV 2/5; 169/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=


[CV 2/5; 167/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200
[CV 2/5; 167/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=200;, score=0.726 total time=   6.4s
[CV 1/5; 168/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 1/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.777 total time=   9.2s
[CV 5/5; 168/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300
[CV 5/5; 168/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.8, min_samples_leaf=6, min_samples_split=4, n_estimators=300;, score=0.784 total time=  10.1s
[CV 1/5; 170/192] START learning_rat


[CV 3/5; 169/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200
[CV 3/5; 169/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=200;, score=0.757 total time=   7.9s
[CV 2/5; 170/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300
[CV 2/5; 170/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=3, n_estimators=300;, score=0.760 total time=  10.9s
[CV 1/5; 171/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200
[CV 1/5; 171/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=200;, score=0.741 total time=   7.2s
[CV 4/5; 171/192] START learning_rat


[CV 5/5; 172/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300
[CV 5/5; 172/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=5, min_samples_split=4, n_estimators=300;, score=0.784 total time=  10.8s
[CV 5/5; 173/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200
[CV 5/5; 173/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=200;, score=0.752 total time=   7.0s
[CV 4/5; 174/192] START learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300
[CV 4/5; 174/192] END learning_rate=0.02, loss=huber, max_depth=4, max_features=0.9, min_samples_leaf=6, min_samples_split=3, n_estimators=300;, score=0.752 total time=   9.7s
[CV 3/5; 175/192] START learning_rat

In [15]:
grid.best_params_
mae_train = mean_absolute_error(y_train,grid.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,grid.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 137940.92
TEST MAE: 173835.95


In [17]:
# Attempting to use different hyperparameters from gridsearch

model = ensemble.GradientBoostingRegressor(
    n_estimators=300,
    max_depth=6,
    min_samples_split=4,
    min_samples_leaf=6,
    learning_rate=0.02,
    max_features=0.9,
    loss='huber'
)

model.fit(X_train, y_train)
mae_train = mean_absolute_error(y_train,model.predict(X_train))
print("TRAIN MAE: %.2f" % mae_train)

mae_test = mean_absolute_error(y_test,model.predict(X_test))
print("TEST MAE: %.2f" % mae_test)

TRAIN MAE: 137777.10
TEST MAE: 174353.58


In [18]:
# Attempt grid search with randomize seed
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,shuffle=True, random_state=42)

model = ensemble.GradientBoostingRegressor()

hyperparams = {
    'n_estimators':[200,250, 300],
    'max_depth':[4,5,6],
    'min_samples_split':[3,4,5],
    'min_samples_leaf':[5,6,7],
    'learning_rate':[0.01,0.02],
    'max_features':[0.6,0.7,0.8,0.9],
    'loss':['ls','lad','huber']
}

grid = GridSearchCV(model, hyperparams, n_jobs=4, verbose=10)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 1944 candidates, totalling 9720 fits


KeyboardInterrupt: 

In [5]:
#Moving back to specific hyperparameters for the purpose of attempting single prediction
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_absolute_error

df = pd.read_csv('../data/Melbourne_housing_FULL.csv')

# Stripping non-important features
df.columns = df.columns.str.strip()
scrubbed = ['Address','Method','SellerG','Date','Postcode','Lattitude','Longtitude','Regionname','Propertycount']
for title in scrubbed:
    del df[title]

# Remove rows with emptiness
df.dropna(axis=0,how='any',thresh=None,subset=None,inplace=True)

# One-hot encoding
df = pd.get_dummies(df,columns=['Suburb','CouncilArea','Type'])

# Set independent and dependent vars
X = df.drop('Price', axis=1)
y = df['Price']

model = ensemble.GradientBoostingRegressor(
                                            n_estimators=250,
                                            learning_rate=0.1,
                                            max_depth=5,
                                            min_samples_split=4,
                                            min_samples_leaf=6,
                                            max_features=0.6,
                                            loss='huber'
                                          )

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,shuffle=True, random_state=42)

model.fit(X_train, y_train)

GradientBoostingRegressor(loss='huber', max_depth=5, max_features=0.6,
                          min_samples_leaf=6, min_samples_split=4,
                          n_estimators=250)

In [7]:
print("house_ex = [")
for var in df.columns.tolist():
    print("\t0," + " #" + var)
print("]")

house_ex = [
	0, #Rooms
	0, #Price
	0, #Distance
	0, #Bedroom2
	0, #Bathroom
	0, #Car
	0, #Landsize
	0, #BuildingArea
	0, #YearBuilt
	0, #Suburb_Abbotsford
	0, #Suburb_Aberfeldie
	0, #Suburb_Airport West
	0, #Suburb_Albanvale
	0, #Suburb_Albert Park
	0, #Suburb_Albion
	0, #Suburb_Alphington
	0, #Suburb_Altona
	0, #Suburb_Altona Meadows
	0, #Suburb_Altona North
	0, #Suburb_Ardeer
	0, #Suburb_Armadale
	0, #Suburb_Ascot Vale
	0, #Suburb_Ashburton
	0, #Suburb_Ashwood
	0, #Suburb_Aspendale
	0, #Suburb_Aspendale Gardens
	0, #Suburb_Attwood
	0, #Suburb_Avondale Heights
	0, #Suburb_Bacchus Marsh
	0, #Suburb_Balaclava
	0, #Suburb_Balwyn
	0, #Suburb_Balwyn North
	0, #Suburb_Bayswater
	0, #Suburb_Bayswater North
	0, #Suburb_Beaconsfield
	0, #Suburb_Beaconsfield Upper
	0, #Suburb_Beaumaris
	0, #Suburb_Bellfield
	0, #Suburb_Bentleigh
	0, #Suburb_Bentleigh East
	0, #Suburb_Berwick
	0, #Suburb_Black Rock
	0, #Suburb_Blackburn
	0, #Suburb_Blackburn North
	0, #Suburb_Blackburn South
	0, #Suburb_Bonbeac

In [10]:
house_ex = [
	3, #Rooms
	4.5, #Distance
	3, #Bedroom2
	1, #Bathroom
	1, #Car
	176, #Landsize
	300, #BuildingArea
	1980, #YearBuilt
	1, #Suburb_Abbotsford
	0, #Suburb_Aberfeldie
	0, #Suburb_Airport West
	0, #Suburb_Albanvale
	0, #Suburb_Albert Park
	0, #Suburb_Albion
	0, #Suburb_Alphington
	0, #Suburb_Altona
	0, #Suburb_Altona Meadows
	0, #Suburb_Altona North
	0, #Suburb_Ardeer
	0, #Suburb_Armadale
	0, #Suburb_Ascot Vale
	0, #Suburb_Ashburton
	0, #Suburb_Ashwood
	0, #Suburb_Aspendale
	0, #Suburb_Aspendale Gardens
	0, #Suburb_Attwood
	0, #Suburb_Avondale Heights
	0, #Suburb_Bacchus Marsh
	0, #Suburb_Balaclava
	0, #Suburb_Balwyn
	0, #Suburb_Balwyn North
	0, #Suburb_Bayswater
	0, #Suburb_Bayswater North
	0, #Suburb_Beaconsfield
	0, #Suburb_Beaconsfield Upper
	0, #Suburb_Beaumaris
	0, #Suburb_Bellfield
	0, #Suburb_Bentleigh
	0, #Suburb_Bentleigh East
	0, #Suburb_Berwick
	0, #Suburb_Black Rock
	0, #Suburb_Blackburn
	0, #Suburb_Blackburn North
	0, #Suburb_Blackburn South
	0, #Suburb_Bonbeach
	0, #Suburb_Boronia
	0, #Suburb_Botanic Ridge
	0, #Suburb_Box Hill
	0, #Suburb_Braybrook
	0, #Suburb_Briar Hill
	0, #Suburb_Brighton
	0, #Suburb_Brighton East
	0, #Suburb_Broadmeadows
	0, #Suburb_Brookfield
	0, #Suburb_Brooklyn
	0, #Suburb_Brunswick
	0, #Suburb_Brunswick East
	0, #Suburb_Brunswick West
	0, #Suburb_Bulleen
	0, #Suburb_Bullengarook
	0, #Suburb_Bundoora
	0, #Suburb_Burnley
	0, #Suburb_Burnside
	0, #Suburb_Burnside Heights
	0, #Suburb_Burwood
	0, #Suburb_Burwood East
	0, #Suburb_Cairnlea
	0, #Suburb_Camberwell
	0, #Suburb_Campbellfield
	0, #Suburb_Canterbury
	0, #Suburb_Carlton
	0, #Suburb_Carlton North
	0, #Suburb_Carnegie
	0, #Suburb_Caroline Springs
	0, #Suburb_Carrum
	0, #Suburb_Carrum Downs
	0, #Suburb_Caulfield
	0, #Suburb_Caulfield East
	0, #Suburb_Caulfield North
	0, #Suburb_Caulfield South
	0, #Suburb_Chadstone
	0, #Suburb_Chelsea
	0, #Suburb_Chelsea Heights
	0, #Suburb_Cheltenham
	0, #Suburb_Chirnside Park
	0, #Suburb_Clarinda
	0, #Suburb_Clayton
	0, #Suburb_Clayton South
	0, #Suburb_Clifton Hill
	0, #Suburb_Coburg
	0, #Suburb_Coburg North
	0, #Suburb_Collingwood
	0, #Suburb_Coolaroo
	0, #Suburb_Craigieburn
	0, #Suburb_Cranbourne
	0, #Suburb_Cranbourne North
	0, #Suburb_Cremorne
	0, #Suburb_Croydon
	0, #Suburb_Croydon Hills
	0, #Suburb_Croydon North
	0, #Suburb_Croydon South
	0, #Suburb_Dallas
	0, #Suburb_Dandenong
	0, #Suburb_Dandenong North
	0, #Suburb_Deepdene
	0, #Suburb_Deer Park
	0, #Suburb_Delahey
	0, #Suburb_Derrimut
	0, #Suburb_Diamond Creek
	0, #Suburb_Diggers Rest
	0, #Suburb_Dingley Village
	0, #Suburb_Doncaster
	0, #Suburb_Doncaster East
	0, #Suburb_Donvale
	0, #Suburb_Doreen
	0, #Suburb_Doveton
	0, #Suburb_Eaglemont
	0, #Suburb_East Melbourne
	0, #Suburb_Edithvale
	0, #Suburb_Elsternwick
	0, #Suburb_Eltham
	0, #Suburb_Eltham North
	0, #Suburb_Elwood
	0, #Suburb_Emerald
	0, #Suburb_Endeavour Hills
	0, #Suburb_Epping
	0, #Suburb_Essendon
	0, #Suburb_Essendon North
	0, #Suburb_Essendon West
	0, #Suburb_Fairfield
	0, #Suburb_Fawkner
	0, #Suburb_Ferntree Gully
	0, #Suburb_Fitzroy
	0, #Suburb_Fitzroy North
	0, #Suburb_Flemington
	0, #Suburb_Footscray
	0, #Suburb_Forest Hill
	0, #Suburb_Frankston
	0, #Suburb_Frankston North
	0, #Suburb_Frankston South
	0, #Suburb_Gardenvale
	0, #Suburb_Gisborne
	0, #Suburb_Gisborne South
	0, #Suburb_Gladstone Park
	0, #Suburb_Glen Huntly
	0, #Suburb_Glen Iris
	0, #Suburb_Glen Waverley
	0, #Suburb_Glenroy
	0, #Suburb_Gowanbrae
	0, #Suburb_Greensborough
	0, #Suburb_Greenvale
	0, #Suburb_Hadfield
	0, #Suburb_Hallam
	0, #Suburb_Hampton
	0, #Suburb_Hampton East
	0, #Suburb_Hampton Park
	0, #Suburb_Hawthorn
	0, #Suburb_Hawthorn East
	0, #Suburb_Healesville
	0, #Suburb_Heathmont
	0, #Suburb_Heidelberg
	0, #Suburb_Heidelberg Heights
	0, #Suburb_Heidelberg West
	0, #Suburb_Highett
	0, #Suburb_Hillside
	0, #Suburb_Hoppers Crossing
	0, #Suburb_Hughesdale
	0, #Suburb_Huntingdale
	0, #Suburb_Hurstbridge
	0, #Suburb_Ivanhoe
	0, #Suburb_Ivanhoe East
	0, #Suburb_Jacana
	0, #Suburb_Kealba
	0, #Suburb_Keilor
	0, #Suburb_Keilor Downs
	0, #Suburb_Keilor East
	0, #Suburb_Keilor Lodge
	0, #Suburb_Keilor Park
	0, #Suburb_Kensington
	0, #Suburb_Kew
	0, #Suburb_Kew East
	0, #Suburb_Keysborough
	0, #Suburb_Kilsyth
	0, #Suburb_Kings Park
	0, #Suburb_Kingsbury
	0, #Suburb_Kingsville
	0, #Suburb_Knoxfield
	0, #Suburb_Kooyong
	0, #Suburb_Kurunjang
	0, #Suburb_Lalor
	0, #Suburb_Langwarrin
	0, #Suburb_Lower Plenty
	0, #Suburb_Lysterfield
	0, #Suburb_Maidstone
	0, #Suburb_Malvern
	0, #Suburb_Malvern East
	0, #Suburb_Maribyrnong
	0, #Suburb_McKinnon
	0, #Suburb_Meadow Heights
	0, #Suburb_Melbourne
	0, #Suburb_Melton
	0, #Suburb_Melton South
	0, #Suburb_Melton West
	0, #Suburb_Mentone
	0, #Suburb_Mernda
	0, #Suburb_Mickleham
	0, #Suburb_Middle Park
	0, #Suburb_Mill Park
	0, #Suburb_Mitcham
	0, #Suburb_Mont Albert
	0, #Suburb_Montmorency
	0, #Suburb_Montrose
	0, #Suburb_Moonee Ponds
	0, #Suburb_Moorabbin
	0, #Suburb_Mooroolbark
	0, #Suburb_Mordialloc
	0, #Suburb_Mount Evelyn
	0, #Suburb_Mount Waverley
	0, #Suburb_Mulgrave
	0, #Suburb_Murrumbeena
	0, #Suburb_Narre Warren
	0, #Suburb_Newport
	0, #Suburb_Niddrie
	0, #Suburb_Noble Park
	0, #Suburb_North Melbourne
	0, #Suburb_North Warrandyte
	0, #Suburb_Northcote
	0, #Suburb_Notting Hill
	0, #Suburb_Nunawading
	0, #Suburb_Oak Park
	0, #Suburb_Oakleigh
	0, #Suburb_Oakleigh East
	0, #Suburb_Oakleigh South
	0, #Suburb_Officer
	0, #Suburb_Ormond
	0, #Suburb_Pakenham
	0, #Suburb_Parkdale
	0, #Suburb_Parkville
	0, #Suburb_Pascoe Vale
	0, #Suburb_Patterson Lakes
	0, #Suburb_Plumpton
	0, #Suburb_Point Cook
	0, #Suburb_Port Melbourne
	0, #Suburb_Prahran
	0, #Suburb_Preston
	0, #Suburb_Princes Hill
	0, #Suburb_Research
	0, #Suburb_Reservoir
	0, #Suburb_Richmond
	0, #Suburb_Riddells Creek
	0, #Suburb_Ringwood
	0, #Suburb_Ringwood East
	0, #Suburb_Ringwood North
	0, #Suburb_Ripponlea
	0, #Suburb_Rosanna
	0, #Suburb_Rowville
	0, #Suburb_Roxburgh Park
	0, #Suburb_Sandhurst
	0, #Suburb_Sandringham
	0, #Suburb_Scoresby
	0, #Suburb_Seabrook
	0, #Suburb_Seaford
	0, #Suburb_Seaholme
	0, #Suburb_Seddon
	0, #Suburb_Skye
	0, #Suburb_South Kingsville
	0, #Suburb_South Melbourne
	0, #Suburb_South Morang
	0, #Suburb_South Yarra
	0, #Suburb_Southbank
	0, #Suburb_Spotswood
	0, #Suburb_Springvale
	0, #Suburb_Springvale South
	0, #Suburb_St Albans
	0, #Suburb_St Helena
	0, #Suburb_St Kilda
	0, #Suburb_Strathmore
	0, #Suburb_Strathmore Heights
	0, #Suburb_Sunbury
	0, #Suburb_Sunshine
	0, #Suburb_Sunshine North
	0, #Suburb_Sunshine West
	0, #Suburb_Surrey Hills
	0, #Suburb_Sydenham
	0, #Suburb_Tarneit
	0, #Suburb_Taylors Hill
	0, #Suburb_Taylors Lakes
	0, #Suburb_Templestowe
	0, #Suburb_Templestowe Lower
	0, #Suburb_The Basin
	0, #Suburb_Thomastown
	0, #Suburb_Thornbury
	0, #Suburb_Toorak
	0, #Suburb_Travancore
	0, #Suburb_Truganina
	0, #Suburb_Tullamarine
	0, #Suburb_Upwey
	0, #Suburb_Vermont
	0, #Suburb_Vermont South
	0, #Suburb_Viewbank
	0, #Suburb_Wallan
	0, #Suburb_Wantirna
	0, #Suburb_Wantirna South
	0, #Suburb_Warrandyte
	0, #Suburb_Waterways
	0, #Suburb_Watsonia
	0, #Suburb_Watsonia North
	0, #Suburb_Wattle Glen
	0, #Suburb_Werribee
	0, #Suburb_West Footscray
	0, #Suburb_West Melbourne
	0, #Suburb_Westmeadows
	0, #Suburb_Wheelers Hill
	0, #Suburb_Whittlesea
	0, #Suburb_Williams Landing
	0, #Suburb_Williamstown
	0, #Suburb_Williamstown North
	0, #Suburb_Windsor
	0, #Suburb_Wollert
	0, #Suburb_Wyndham Vale
	0, #Suburb_Yallambie
	0, #Suburb_Yarra Glen
	0, #Suburb_Yarraville
	0, #CouncilArea_Banyule City Council
	0, #CouncilArea_Bayside City Council
	0, #CouncilArea_Boroondara City Council
	0, #CouncilArea_Brimbank City Council
	0, #CouncilArea_Cardinia Shire Council
	0, #CouncilArea_Casey City Council
	0, #CouncilArea_Darebin City Council
	0, #CouncilArea_Frankston City Council
	0, #CouncilArea_Glen Eira City Council
	0, #CouncilArea_Greater Dandenong City Council
	0, #CouncilArea_Hobsons Bay City Council
	0, #CouncilArea_Hume City Council
	0, #CouncilArea_Kingston City Council
	0, #CouncilArea_Knox City Council
	0, #CouncilArea_Macedon Ranges Shire Council
	0, #CouncilArea_Manningham City Council
	0, #CouncilArea_Maribyrnong City Council
	0, #CouncilArea_Maroondah City Council
	0, #CouncilArea_Melbourne City Council
	0, #CouncilArea_Melton City Council
	0, #CouncilArea_Mitchell Shire Council
	0, #CouncilArea_Monash City Council
	0, #CouncilArea_Moonee Valley City Council
	0, #CouncilArea_Moorabool Shire Council
	0, #CouncilArea_Moreland City Council
	0, #CouncilArea_Nillumbik Shire Council
	0, #CouncilArea_Port Phillip City Council
	0, #CouncilArea_Stonnington City Council
	0, #CouncilArea_Whitehorse City Council
	0, #CouncilArea_Whittlesea City Council
	0, #CouncilArea_Wyndham City Council
	1, #CouncilArea_Yarra City Council
	0, #CouncilArea_Yarra Ranges Shire Council
	1, #Type_h
	0, #Type_t
	0, #Type_u
]

model_prediction = model.predict([house_ex])
model_prediction

array([1376647.16348312])