In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
train_df = pd.read_csv('../preprocessing_data/preprocessed_train.csv').drop('Id', axis= 1)
test_df = pd.read_csv('../preprocessing_data/preprocessed_test.csv').drop('Id', axis= 1)
X = np.array(train_df.drop(['SalePrice'], axis = 1).values)
y = np.log1p(np.array(train_df['SalePrice'].values))
X_test = np.array(test_df.values)

In [3]:
from bayes_opt import BayesianOptimization
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import get_scorer, mean_squared_error
import time

In [4]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [5]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size= 0.2, random_state=42)

In [6]:
def lasso_cl_bo(alpha, max_iter):
    params_lasso = {}
    params_lasso['alpha'] = alpha
    params_lasso['max_iter'] = round(max_iter)
    
    lasso = Lasso(random_state= 123, **params_lasso)
    lasso.fit(X_train, y_train)

    score = -rmse(y_valid, lasso.predict(X_valid))
    return score

# Run Bayesian Optimization
start = time.time()
params_lasso ={
    'alpha':(0, 0.01),
    'max_iter': (1000, 3000),
}
lasso_bo = BayesianOptimization(lasso_cl_bo, params_lasso, random_state=111)
lasso_bo.maximize(init_points=20, n_iter=4)
print('It takes %s minutes' % ((time.time() - start)/60))

|   iter    |  target   |   alpha   | max_iter  |
-------------------------------------------------
| [0m1        [0m | [0m-0.1273  [0m | [0m0.006122 [0m | [0m1.338e+03[0m |
| [95m2        [0m | [95m-0.1248  [0m | [95m0.004361 [0m | [95m2.539e+03[0m |
| [95m3        [0m | [95m-0.1234  [0m | [95m0.002953 [0m | [95m1.298e+03[0m |
| [95m4        [0m | [95m-0.1204  [0m | [95m0.0002248[0m | [95m1.84e+03 [0m |
| [0m5        [0m | [0m-0.1223  [0m | [0m0.002387 [0m | [0m1.675e+03[0m |
| [0m6        [0m | [0m-0.1321  [0m | [0m0.009907 [0m | [0m1.475e+03[0m |
| [95m7        [0m | [95m-0.1178  [0m | [95m0.0008119[0m | [95m2.339e+03[0m |
| [0m8        [0m | [0m-0.1275  [0m | [0m0.006212 [0m | [0m1.549e+03[0m |
| [0m9        [0m | [0m-0.1252  [0m | [0m0.004662 [0m | [0m1.237e+03[0m |
| [0m10       [0m | [0m-0.1179  [0m | [0m0.0007396[0m | [0m2.802e+03[0m |
| [0m11       [0m | [0m-0.1296  [0m | [0m0.00794  [0m | [

In [9]:
lasso = Lasso(alpha = 0.0008119, max_iter= 2339)
lasso.fit(X_train, y_train)
rmse(y_valid, lasso.predict(X_valid))

0.11778937954176705

In [10]:
ans = pd.read_csv('../submission/cheat.csv').drop('Id', axis = 1)
ans = np.array(ans.values).reshape(1, -1)[0]
y_pred = lasso.predict(X_test)
print(f"Error: {rmse(np.log1p(ans), y_pred)}")

Error: 0.12630942885112117
