In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('train_validation.csv',low_memory=False)
x_train = df.drop(['price_log'], axis=1)
y_train = df['price_log']

In [6]:
df_test = pd.read_csv('test.csv',low_memory=False)
x_test = df_test.drop(['price_log'], axis=1)
y_test = df_test['price_log']

In [9]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
score_objective = "neg_mean_squared_error"
from sklearn.metrics import mean_squared_error

### Gradient Boosting Regressor

In [10]:
gbr = GradientBoostingRegressor(random_state=0)
grid_values_gbr = {'n_estimators': [20,30,40,50,60,70,80,90,100], 'learning_rate' : [0.1], 'max_depth' : [1,2,3,4,5,6,7,8,9,10]}
grid_gbr = GridSearchCV(gbr, param_grid = grid_values_gbr, scoring = score_objective, cv=5, n_jobs=16)
grid_gbr.fit(x_train, y_train)
print('Grid best parameter for GradientBoostingRegressor (max. {}): '.format(score_objective), grid_gbr.best_params_)
print('Grid best score ({}): '.format(score_objective), -grid_gbr.best_score_)

Grid best parameter for GradientBoostingRegressor (max. neg_mean_squared_error):  {'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 100}
Grid best score (neg_mean_squared_error):  0.12893121473309382


In [15]:
params_gbr = {'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.1}
clf_gbr = GradientBoostingRegressor(**params_gbr)
clf_gbr.fit(x_train,y_train)
mse_gbr = mean_squared_error(y_test, clf_gbr.predict(x_test))
print("MSE of Gradient Boosting Regressor: %.4f" % mse_gbr)

MSE of Gradient Boosting Regressor: 0.1193


### SVR

In [17]:
svr = SVR()
grid_values_svr = {'degree': [3,4,5,6], 'cache_size' : [100,200,300,400]}
grid_svr = GridSearchCV(svr, param_grid = grid_values_svr, scoring = score_objective, cv=5, n_jobs=16)
grid_svr.fit(x_train, y_train)
print('Grid best parameter SVR (max. {}): '.format(score_objective), grid_svr.best_params_)
print('Grid best score ({}): '.format(score_objective), -grid_svr.best_score_)



Grid best parameter SVR (max. neg_mean_squared_error):  {'cache_size': 100, 'degree': 3}
Grid best score (neg_mean_squared_error):  0.146267839604804


In [12]:
params_svr = {'degree': 3, 'cache_size': 100}
clf_svr = SVR(**params_svr)
clf_svr.fit(x_train,y_train)
mse_svr = mean_squared_error(y_test, clf_svr.predict(x_test))
print("MSE of SVR: %.4f" % mse_svr)



MSE of SVR: 0.1271


### AdaBoostRegressor

In [22]:
abr = AdaBoostRegressor(random_state=0)
grid_values_abr = {'n_estimators': [30,40,50,60,70,80,90], 'learning_rate' : [0.1,0.5,1.0,1.5,2.0]}
grid_abr = GridSearchCV(abr, param_grid = grid_values_abr, scoring = score_objective, cv=5, n_jobs=16)
grid_abr.fit(x_train, y_train)
print('Grid best parameter AdaBoostRegressor (max. {}): '.format(score_objective), grid_abr.best_params_)
print('Grid best score ({}): '.format(score_objective), -grid_abr.best_score_)

Grid best parameter AdaBoostRegressor (max. neg_mean_squared_error):  {'learning_rate': 0.1, 'n_estimators': 70}
Grid best score (neg_mean_squared_error):  0.19478043350748722


In [13]:
params_abr = {'n_estimators': 70, 'learning_rate': 0.1}
clf_abr = GradientBoostingRegressor(**params_abr)
clf_abr.fit(x_train,y_train)
mse_abr = mean_squared_error(y_test, clf_abr.predict(x_test))
print("MSE of Ada Boost Regressor: %.4f" % mse_abr)

MSE of Ada Boost Regressor: 0.1314


### Random Forest Regressor

In [18]:
rfr = RandomForestRegressor(random_state=0)
grid_values_rfr = {'n_estimators': [20,30,40,50,60,70,80,90,100], 'max_features' : [1,2,3,4,5,6,7,8,9,10]}
grid_rfr = GridSearchCV(rfr, param_grid = grid_values_rfr, scoring = score_objective, cv=5, n_jobs=16)
grid_rfr.fit(x_train, y_train)
print('Grid best parameter RandomForestRegressor (max. {}): '.format(score_objective), grid_rfr.best_params_)
print('Grid best score ({}): '.format(score_objective), -grid_rfr.best_score_)

Grid best parameter RandomForestRegressor (max. neg_mean_squared_error):  {'max_features': 10, 'n_estimators': 100}
Grid best score (neg_mean_squared_error):  0.13618102929841894


In [14]:
params_rfr = {'n_estimators': 100, 'max_features': 10}
clf_rfr = GradientBoostingRegressor(**params_rfr)
clf_rfr.fit(x_train,y_train)
mse_rfr = mean_squared_error(y_test, clf_rfr.predict(x_test))
print("MSE of Random Forest Regressor: %.4f" % mse_rfr)

MSE of Random Forest Regressor: 0.1296
