# Adaboost Regression house price prediction

### Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

### Load Data

In [2]:
df = pd.read_csv('jiffs_house_price_dataset.csv')

In [3]:
df.head()

Unnamed: 0,land_size_sqm,house_size_sqm,no_of_rooms,no_of_bathrooms,large_living_room,parking_space,front_garden,swimming_pool,distance_to_school_km,wall_fence,house_age_or_renovated,water_front,distance_to_supermarket_km,property_value
0,201,177,3,1,0,1,1,0,3.3,1,10,0,6.8,167611
1,196,182,4,3,1,1,0,1,1.2,1,11,0,4.1,186945
2,198,182,4,4,1,1,0,1,5.9,0,20,0,2.1,154628
3,178,166,2,3,0,1,0,0,5.9,0,5,0,0.7,133099
4,183,165,3,1,1,1,0,0,3.8,1,8,0,0.7,158481


### Split Data

In [4]:
X = df.drop('property_value', axis=1)
y = df['property_value']

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

### Try Adaboost with Default Parameters

In [7]:
from sklearn.ensemble import AdaBoostRegressor

In [8]:
ab_model = AdaBoostRegressor()

In [9]:
ab_model.get_params()

{'base_estimator': None,
 'learning_rate': 1.0,
 'loss': 'linear',
 'n_estimators': 50,
 'random_state': None}

In [10]:
ab_model.fit(X_train, y_train)
ab_pred = ab_model.predict(X_test)
train_pred = ab_model.predict(X_train) #To check the training accuracy

In [13]:
from sklearn import metrics

print('MAE:', round(metrics.mean_absolute_error(y_test, ab_pred),2))
print('MSE:', round(metrics.mean_squared_error(y_test, ab_pred),2))
print('RMSE:', round(np.sqrt(metrics.mean_squared_error(y_test, ab_pred)),2))

MAE: 12281.56
MSE: 241706568.4
RMSE: 15546.92


In [21]:
index_position = 1
np_y_test = np.array(y_test)
actual = np_y_test[index_position]/1
pred = round(ab_pred[index_position],2)
diff = round((np_y_test[index_position]/1)-(ab_pred[index_position]),2)
perc = round(diff/actual*100,2)
print('Actual is: ' + str(actual))
print('Prediction is: ' + str(pred))
print('Difference is: ' + str(diff))
print('Error Percentage is: ' + str(perc)+'%')

Actual is: 248955.0
Prediction is: 259280.27
Difference is: -10325.27
Error Percentage is: -4.15%


### Grid Search

In [35]:
params = {
    'n_estimators':[1,10,20,30,40,50,60,80,100,150],
    'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]
    }

In [36]:
from sklearn.model_selection import GridSearchCV

In [37]:
grid = GridSearchCV(AdaBoostRegressor(),params,refit=True,verbose=2)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
[CV] learning_rate=0.1, n_estimators=1 ...............................
[CV] ................ learning_rate=0.1, n_estimators=1, total=   0.0s
[CV] learning_rate=0.1, n_estimators=1 ...............................
[CV] ................ learning_rate=0.1, n_estimators=1, total=   0.0s
[CV] learning_rate=0.1, n_estimators=1 ...............................
[CV] ................ learning_rate=0.1, n_estimators=1, total=   0.0s
[CV] learning_rate=0.1, n_estimators=1 ...............................
[CV] ................ learning_rate=0.1, n_estimators=1, total=   0.0s
[CV] learning_rate=0.1, n_estimators=1 ...............................
[CV] ................ learning_rate=0.1, n_estimators=1, total=   0.0s
[CV] learning_rate=0.1, n_estimators=10 ..............................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ............... learning_rate=0.1, n_estimators=10, total=   0.1s
[CV] learning_rate=0.1, n_estimators=10 ..............................
[CV] ............... learning_rate=0.1, n_estimators=10, total=   0.1s
[CV] learning_rate=0.1, n_estimators=10 ..............................
[CV] ............... learning_rate=0.1, n_estimators=10, total=   0.1s
[CV] learning_rate=0.1, n_estimators=10 ..............................
[CV] ............... learning_rate=0.1, n_estimators=10, total=   0.1s
[CV] learning_rate=0.1, n_estimators=10 ..............................
[CV] ............... learning_rate=0.1, n_estimators=10, total=   0.1s
[CV] learning_rate=0.1, n_estimators=20 ..............................
[CV] ............... learning_rate=0.1, n_estimators=20, total=   0.2s
[CV] learning_rate=0.1, n_estimators=20 ..............................
[CV] ............... learning_rate=0.1, n_estimators=20, total=   0.2s
[CV] learning_rate=0.1, n_estimators=20 ..............................
[CV] .

[CV] ............... learning_rate=0.2, n_estimators=20, total=   0.2s
[CV] learning_rate=0.2, n_estimators=20 ..............................
[CV] ............... learning_rate=0.2, n_estimators=20, total=   0.2s
[CV] learning_rate=0.2, n_estimators=30 ..............................
[CV] ............... learning_rate=0.2, n_estimators=30, total=   0.3s
[CV] learning_rate=0.2, n_estimators=30 ..............................
[CV] ............... learning_rate=0.2, n_estimators=30, total=   0.3s
[CV] learning_rate=0.2, n_estimators=30 ..............................
[CV] ............... learning_rate=0.2, n_estimators=30, total=   0.3s
[CV] learning_rate=0.2, n_estimators=30 ..............................
[CV] ............... learning_rate=0.2, n_estimators=30, total=   0.3s
[CV] learning_rate=0.2, n_estimators=30 ..............................
[CV] ............... learning_rate=0.2, n_estimators=30, total=   0.3s
[CV] learning_rate=0.2, n_estimators=40 ..............................
[CV] .

[CV] ............... learning_rate=0.3, n_estimators=40, total=   0.4s
[CV] learning_rate=0.3, n_estimators=40 ..............................
[CV] ............... learning_rate=0.3, n_estimators=40, total=   0.4s
[CV] learning_rate=0.3, n_estimators=40 ..............................
[CV] ............... learning_rate=0.3, n_estimators=40, total=   0.4s
[CV] learning_rate=0.3, n_estimators=40 ..............................
[CV] ............... learning_rate=0.3, n_estimators=40, total=   0.4s
[CV] learning_rate=0.3, n_estimators=50 ..............................
[CV] ............... learning_rate=0.3, n_estimators=50, total=   0.5s
[CV] learning_rate=0.3, n_estimators=50 ..............................
[CV] ............... learning_rate=0.3, n_estimators=50, total=   0.5s
[CV] learning_rate=0.3, n_estimators=50 ..............................
[CV] ............... learning_rate=0.3, n_estimators=50, total=   0.5s
[CV] learning_rate=0.3, n_estimators=50 ..............................
[CV] .

[CV] ............... learning_rate=0.4, n_estimators=50, total=   0.5s
[CV] learning_rate=0.4, n_estimators=60 ..............................
[CV] ............... learning_rate=0.4, n_estimators=60, total=   0.6s
[CV] learning_rate=0.4, n_estimators=60 ..............................
[CV] ............... learning_rate=0.4, n_estimators=60, total=   0.5s
[CV] learning_rate=0.4, n_estimators=60 ..............................
[CV] ............... learning_rate=0.4, n_estimators=60, total=   0.6s
[CV] learning_rate=0.4, n_estimators=60 ..............................
[CV] ............... learning_rate=0.4, n_estimators=60, total=   0.6s
[CV] learning_rate=0.4, n_estimators=60 ..............................
[CV] ............... learning_rate=0.4, n_estimators=60, total=   0.6s
[CV] learning_rate=0.4, n_estimators=80 ..............................
[CV] ............... learning_rate=0.4, n_estimators=80, total=   0.8s
[CV] learning_rate=0.4, n_estimators=80 ..............................
[CV] .

[CV] ............... learning_rate=0.5, n_estimators=80, total=   0.8s
[CV] learning_rate=0.5, n_estimators=80 ..............................
[CV] ............... learning_rate=0.5, n_estimators=80, total=   0.8s
[CV] learning_rate=0.5, n_estimators=80 ..............................
[CV] ............... learning_rate=0.5, n_estimators=80, total=   0.8s
[CV] learning_rate=0.5, n_estimators=100 .............................
[CV] .............. learning_rate=0.5, n_estimators=100, total=   0.9s
[CV] learning_rate=0.5, n_estimators=100 .............................
[CV] .............. learning_rate=0.5, n_estimators=100, total=   0.9s
[CV] learning_rate=0.5, n_estimators=100 .............................
[CV] .............. learning_rate=0.5, n_estimators=100, total=   0.9s
[CV] learning_rate=0.5, n_estimators=100 .............................
[CV] .............. learning_rate=0.5, n_estimators=100, total=   0.9s
[CV] learning_rate=0.5, n_estimators=100 .............................
[CV] .

[CV] .............. learning_rate=0.6, n_estimators=150, total=   1.3s
[CV] learning_rate=0.6, n_estimators=150 .............................
[CV] .............. learning_rate=0.6, n_estimators=150, total=   1.3s
[CV] learning_rate=0.6, n_estimators=150 .............................
[CV] .............. learning_rate=0.6, n_estimators=150, total=   1.3s
[CV] learning_rate=0.6, n_estimators=150 .............................
[CV] .............. learning_rate=0.6, n_estimators=150, total=   1.3s
[CV] learning_rate=0.6, n_estimators=150 .............................
[CV] .............. learning_rate=0.6, n_estimators=150, total=   1.3s
[CV] learning_rate=0.7, n_estimators=1 ...............................
[CV] ................ learning_rate=0.7, n_estimators=1, total=   0.0s
[CV] learning_rate=0.7, n_estimators=1 ...............................
[CV] ................ learning_rate=0.7, n_estimators=1, total=   0.0s
[CV] learning_rate=0.7, n_estimators=1 ...............................
[CV] .

[CV] ............... learning_rate=0.8, n_estimators=10, total=   0.1s
[CV] learning_rate=0.8, n_estimators=10 ..............................
[CV] ............... learning_rate=0.8, n_estimators=10, total=   0.1s
[CV] learning_rate=0.8, n_estimators=10 ..............................
[CV] ............... learning_rate=0.8, n_estimators=10, total=   0.1s
[CV] learning_rate=0.8, n_estimators=10 ..............................
[CV] ............... learning_rate=0.8, n_estimators=10, total=   0.1s
[CV] learning_rate=0.8, n_estimators=10 ..............................
[CV] ............... learning_rate=0.8, n_estimators=10, total=   0.1s
[CV] learning_rate=0.8, n_estimators=20 ..............................
[CV] ............... learning_rate=0.8, n_estimators=20, total=   0.2s
[CV] learning_rate=0.8, n_estimators=20 ..............................
[CV] ............... learning_rate=0.8, n_estimators=20, total=   0.2s
[CV] learning_rate=0.8, n_estimators=20 ..............................
[CV] .

[CV] ............... learning_rate=0.9, n_estimators=20, total=   0.2s
[CV] learning_rate=0.9, n_estimators=20 ..............................
[CV] ............... learning_rate=0.9, n_estimators=20, total=   0.2s
[CV] learning_rate=0.9, n_estimators=30 ..............................
[CV] ............... learning_rate=0.9, n_estimators=30, total=   0.3s
[CV] learning_rate=0.9, n_estimators=30 ..............................
[CV] ............... learning_rate=0.9, n_estimators=30, total=   0.3s
[CV] learning_rate=0.9, n_estimators=30 ..............................
[CV] ............... learning_rate=0.9, n_estimators=30, total=   0.3s
[CV] learning_rate=0.9, n_estimators=30 ..............................
[CV] ............... learning_rate=0.9, n_estimators=30, total=   0.3s
[CV] learning_rate=0.9, n_estimators=30 ..............................
[CV] ............... learning_rate=0.9, n_estimators=30, total=   0.3s
[CV] learning_rate=0.9, n_estimators=40 ..............................
[CV] .

[CV] ................. learning_rate=1, n_estimators=40, total=   0.4s
[CV] learning_rate=1, n_estimators=40 ................................
[CV] ................. learning_rate=1, n_estimators=40, total=   0.4s
[CV] learning_rate=1, n_estimators=40 ................................
[CV] ................. learning_rate=1, n_estimators=40, total=   0.4s
[CV] learning_rate=1, n_estimators=40 ................................
[CV] ................. learning_rate=1, n_estimators=40, total=   0.4s
[CV] learning_rate=1, n_estimators=50 ................................
[CV] ................. learning_rate=1, n_estimators=50, total=   0.4s
[CV] learning_rate=1, n_estimators=50 ................................
[CV] ................. learning_rate=1, n_estimators=50, total=   0.5s
[CV] learning_rate=1, n_estimators=50 ................................
[CV] ................. learning_rate=1, n_estimators=50, total=   0.4s
[CV] learning_rate=1, n_estimators=50 ................................
[CV] .

[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:  4.3min finished


GridSearchCV(estimator=AdaBoostRegressor(),
             param_grid={'learning_rate': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
                                           0.8, 0.9, 1],
                         'n_estimators': [1, 10, 20, 30, 40, 50, 60, 80, 100,
                                          150]},
             verbose=2)

In [40]:
grid_predictions = grid.predict(X_test)

In [41]:
from sklearn import metrics

print('MAE:', metrics.mean_absolute_error(y_test, grid_predictions))
print('MSE:', metrics.mean_squared_error(y_test, grid_predictions))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, grid_predictions)))

MAE: 12130.388716179172
MSE: 232116281.50102377
RMSE: 15235.362860825591


In [42]:
index_position = 0
np_y_test = np.array(y_test)
actual = np_y_test[index_position]/1
pred = round(grid_predictions[index_position],2)
diff = round((np_y_test[index_position]/1)-(grid_predictions[index_position]),2)
perc = round(diff/actual*100,2)
print('Actual is: ' + str(actual))
print('Prediction is: ' + str(pred))
print('Difference is: ' + str(diff))
print('Error Percentage is: ' + str(perc)+'%')

Actual is: 81558.0
Prediction is: 106057.53
Difference is: -24499.53
Error Percentage is: -30.04%
