In [3]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error 

In [4]:
X_train = np.load( 'data/X_train_2.npy',allow_pickle=True)
y_train = np.load( 'data/y_train_2.npy',allow_pickle=True)
X_val = np.load('data/X_val_2.npy',allow_pickle=True)
y_val = np.load('data/y_val_2.npy',allow_pickle=True)
X_train.shape, X_val.shape, y_train.shape, y_val.shape

((76500, 166), (19125, 166), (76500,), (19125,))

In [5]:
model = Lasso(alpha=1.0)
model.fit(X_train, y_train)

train_predict = model.predict(X_train)
val_predict = model.predict(X_val)

train_rmse = np.sqrt(mean_squared_error(y_train, train_predict))
val_rmse = np.sqrt(mean_squared_error(y_val, val_predict))

train_rmse, val_rmse

(0.4800419868611222, 0.4839273913661117)

In [6]:
import optuna
import logging
import sys
import os

optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

study_name = "lasso-study_ver1"

storage_name = "sqlite:///{}/{}.db".format(os.getcwd(), study_name) 






[32m[I 2022-03-27 22:48:46,610][0m A new study created in RDB with name: lasso-study_ver1[0m


A new study created in RDB with name: lasso-study_ver1


In [26]:
optuna.delete_study(study_name, storage_name)

In [27]:
study = optuna.create_study(
study_name=study_name,
storage=storage_name,
# load_if_exists=True,
direction="minimize",
)

A new study created in RDB with name: lasso-study_ver1


[32m[I 2022-03-27 23:02:07,066][0m A new study created in RDB with name: lasso-study_ver1[0m


In [28]:

def objective(trial):

    alpha = trial.suggest_float("alpha", 1e-5, 1)

    print(alpha)
    lasso_model = Lasso(
        alpha = alpha,)
        # normalize = True,)
        # random_state = 25)
    
    lasso_model.fit(X_train, y_train)

    train_predict = lasso_model.predict(X_train)
    val_predict = lasso_model.predict(X_val)

    train_rmse = np.sqrt(mean_squared_error(y_train, train_predict))
    val_rmse = np.sqrt(mean_squared_error(y_val, val_predict))

    trial.set_user_attr('train_rmse',train_rmse)
    trial.set_user_attr('val_rmse',val_rmse)

    return val_rmse


In [29]:
study.optimize(objective, n_trials = 20, show_progress_bar=True)

trials_df = study.trials_dataframe()

trials_df.to_excel(study_name + '.xlsx') 

  self._init_valid()
  0%|          | 0/20 [00:00<?, ?it/s]

0.7304518289965524
Trial 0 finished with value: 0.483902883766806 and parameters: {'alpha': 0.7304518289965524}. Best is trial 0 with value: 0.483902883766806.


  5%|▌         | 1/20 [00:01<00:22,  1.16s/it]

[32m[I 2022-03-27 23:02:17,761][0m Trial 0 finished with value: 0.483902883766806 and parameters: {'alpha': 0.7304518289965524}. Best is trial 0 with value: 0.483902883766806.[0m
0.016967781738731553
Trial 1 finished with value: 0.4834522105536552 and parameters: {'alpha': 0.016967781738731553}. Best is trial 1 with value: 0.4834522105536552.


  model = cd_fast.enet_coordinate_descent(
 10%|█         | 2/20 [00:17<03:00, 10.02s/it]

[32m[I 2022-03-27 23:02:33,984][0m Trial 1 finished with value: 0.4834522105536552 and parameters: {'alpha': 0.016967781738731553}. Best is trial 1 with value: 0.4834522105536552.[0m
0.983692271353257
Trial 2 finished with value: 0.4839256959457846 and parameters: {'alpha': 0.983692271353257}. Best is trial 1 with value: 0.4834522105536552.


 15%|█▌        | 3/20 [00:18<01:41,  5.97s/it]

[32m[I 2022-03-27 23:02:35,129][0m Trial 2 finished with value: 0.4839256959457846 and parameters: {'alpha': 0.983692271353257}. Best is trial 1 with value: 0.4834522105536552.[0m
0.8623741480287739
Trial 3 finished with value: 0.4839139431355538 and parameters: {'alpha': 0.8623741480287739}. Best is trial 1 with value: 0.4834522105536552.


 20%|██        | 4/20 [00:19<01:04,  4.06s/it]

[32m[I 2022-03-27 23:02:36,254][0m Trial 3 finished with value: 0.4839139431355538 and parameters: {'alpha': 0.8623741480287739}. Best is trial 1 with value: 0.4834522105536552.[0m
0.7910886801038345
Trial 4 finished with value: 0.4839077444553385 and parameters: {'alpha': 0.7910886801038345}. Best is trial 1 with value: 0.4834522105536552.


 25%|██▌       | 5/20 [00:20<00:45,  3.01s/it]

[32m[I 2022-03-27 23:02:37,405][0m Trial 4 finished with value: 0.4839077444553385 and parameters: {'alpha': 0.7910886801038345}. Best is trial 1 with value: 0.4834522105536552.[0m
0.009948850395688318
Trial 5 finished with value: 0.48336895232903687 and parameters: {'alpha': 0.009948850395688318}. Best is trial 5 with value: 0.48336895232903687.


  model = cd_fast.enet_coordinate_descent(
 30%|███       | 6/20 [00:36<01:43,  7.38s/it]

[32m[I 2022-03-27 23:02:53,260][0m Trial 5 finished with value: 0.48336895232903687 and parameters: {'alpha': 0.009948850395688318}. Best is trial 5 with value: 0.48336895232903687.[0m
0.46862517556642214
Trial 6 finished with value: 0.48388624464108226 and parameters: {'alpha': 0.46862517556642214}. Best is trial 5 with value: 0.48336895232903687.


 35%|███▌      | 7/20 [00:37<01:09,  5.33s/it]

[32m[I 2022-03-27 23:02:54,385][0m Trial 6 finished with value: 0.48388624464108226 and parameters: {'alpha': 0.46862517556642214}. Best is trial 5 with value: 0.48336895232903687.[0m
0.9980159026766993
Trial 7 finished with value: 0.4839271836273059 and parameters: {'alpha': 0.9980159026766993}. Best is trial 5 with value: 0.48336895232903687.


 40%|████      | 8/20 [00:38<00:47,  4.00s/it]

[32m[I 2022-03-27 23:02:55,524][0m Trial 7 finished with value: 0.4839271836273059 and parameters: {'alpha': 0.9980159026766993}. Best is trial 5 with value: 0.48336895232903687.[0m
0.5060891047325613
Trial 8 finished with value: 0.48388819250482495 and parameters: {'alpha': 0.5060891047325613}. Best is trial 5 with value: 0.48336895232903687.


 45%|████▌     | 9/20 [00:40<00:34,  3.11s/it]

[32m[I 2022-03-27 23:02:56,668][0m Trial 8 finished with value: 0.48388819250482495 and parameters: {'alpha': 0.5060891047325613}. Best is trial 5 with value: 0.48336895232903687.[0m
0.8281025180890736
Trial 9 finished with value: 0.48391089768233286 and parameters: {'alpha': 0.8281025180890736}. Best is trial 5 with value: 0.48336895232903687.


 50%|█████     | 10/20 [00:41<00:24,  2.50s/it]

[32m[I 2022-03-27 23:02:57,801][0m Trial 9 finished with value: 0.48391089768233286 and parameters: {'alpha': 0.8281025180890736}. Best is trial 5 with value: 0.48336895232903687.[0m
0.05035309922010134
Trial 10 finished with value: 0.48362228061286644 and parameters: {'alpha': 0.05035309922010134}. Best is trial 5 with value: 0.48336895232903687.


 55%|█████▌    | 11/20 [00:47<00:31,  3.55s/it]

[32m[I 2022-03-27 23:03:03,731][0m Trial 10 finished with value: 0.48362228061286644 and parameters: {'alpha': 0.05035309922010134}. Best is trial 5 with value: 0.48336895232903687.[0m
0.008169850539650775


  model = cd_fast.enet_coordinate_descent(


Trial 11 finished with value: 0.48334494874374323 and parameters: {'alpha': 0.008169850539650775}. Best is trial 11 with value: 0.48334494874374323.


 60%|██████    | 12/20 [01:02<00:57,  7.24s/it]

[32m[I 2022-03-27 23:03:19,426][0m Trial 11 finished with value: 0.48334494874374323 and parameters: {'alpha': 0.008169850539650775}. Best is trial 11 with value: 0.48334494874374323.[0m
0.19866665378255227
Trial 12 finished with value: 0.48387556695615686 and parameters: {'alpha': 0.19866665378255227}. Best is trial 11 with value: 0.48334494874374323.


 65%|██████▌   | 13/20 [01:04<00:38,  5.43s/it]

[32m[I 2022-03-27 23:03:20,701][0m Trial 12 finished with value: 0.48387556695615686 and parameters: {'alpha': 0.19866665378255227}. Best is trial 11 with value: 0.48334494874374323.[0m
0.24266902321190403
Trial 13 finished with value: 0.4838771012177973 and parameters: {'alpha': 0.24266902321190403}. Best is trial 11 with value: 0.48334494874374323.


 70%|███████   | 14/20 [01:05<00:25,  4.19s/it]

[32m[I 2022-03-27 23:03:22,010][0m Trial 13 finished with value: 0.4838771012177973 and parameters: {'alpha': 0.24266902321190403}. Best is trial 11 with value: 0.48334494874374323.[0m
0.1971791487268369
Trial 14 finished with value: 0.48387551865296996 and parameters: {'alpha': 0.1971791487268369}. Best is trial 11 with value: 0.48334494874374323.


 75%|███████▌  | 15/20 [01:06<00:16,  3.32s/it]

[32m[I 2022-03-27 23:03:23,310][0m Trial 14 finished with value: 0.48387551865296996 and parameters: {'alpha': 0.1971791487268369}. Best is trial 11 with value: 0.48334494874374323.[0m
0.37951133017781324
Trial 15 finished with value: 0.48388229186109905 and parameters: {'alpha': 0.37951133017781324}. Best is trial 11 with value: 0.48334494874374323.


 80%|████████  | 16/20 [01:07<00:10,  2.65s/it]

[32m[I 2022-03-27 23:03:24,405][0m Trial 15 finished with value: 0.48388229186109905 and parameters: {'alpha': 0.37951133017781324}. Best is trial 11 with value: 0.48334494874374323.[0m
0.11088475269508813
Trial 16 finished with value: 0.48387570735311536 and parameters: {'alpha': 0.11088475269508813}. Best is trial 11 with value: 0.48334494874374323.


 85%|████████▌ | 17/20 [01:12<00:09,  3.26s/it]

[32m[I 2022-03-27 23:03:29,083][0m Trial 16 finished with value: 0.48387570735311536 and parameters: {'alpha': 0.11088475269508813}. Best is trial 11 with value: 0.48334494874374323.[0m
0.6268609065783093
Trial 17 finished with value: 0.4838954561722889 and parameters: {'alpha': 0.6268609065783093}. Best is trial 11 with value: 0.48334494874374323.


 90%|█████████ | 18/20 [01:13<00:05,  2.62s/it]

[32m[I 2022-03-27 23:03:30,221][0m Trial 17 finished with value: 0.4838954561722889 and parameters: {'alpha': 0.6268609065783093}. Best is trial 11 with value: 0.48334494874374323.[0m
0.33964515386732164
Trial 18 finished with value: 0.4838808554846049 and parameters: {'alpha': 0.33964515386732164}. Best is trial 11 with value: 0.48334494874374323.


 95%|█████████▌| 19/20 [01:14<00:02,  2.17s/it]

[32m[I 2022-03-27 23:03:31,338][0m Trial 18 finished with value: 0.4838808554846049 and parameters: {'alpha': 0.33964515386732164}. Best is trial 11 with value: 0.48334494874374323.[0m
0.0967151963936671
Trial 19 finished with value: 0.4838703995714432 and parameters: {'alpha': 0.0967151963936671}. Best is trial 11 with value: 0.48334494874374323.


100%|██████████| 20/20 [01:19<00:00,  3.98s/it]

[32m[I 2022-03-27 23:03:36,253][0m Trial 19 finished with value: 0.4838703995714432 and parameters: {'alpha': 0.0967151963936671}. Best is trial 11 with value: 0.48334494874374323.[0m





In [30]:
trials_df

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_alpha,user_attrs_train_rmse,user_attrs_val_rmse,state
0,0,0.483903,2022-03-27 23:02:16.613908,2022-03-27 23:02:17.729245,0 days 00:00:01.115337,0.730452,0.480017,0.483903,COMPLETE
1,1,0.483452,2022-03-27 23:02:17.767321,2022-03-27 23:02:33.954436,0 days 00:00:16.187115,0.016968,0.479763,0.483452,COMPLETE
2,2,0.483926,2022-03-27 23:02:33.990030,2022-03-27 23:02:35.100936,0 days 00:00:01.110906,0.983692,0.48004,0.483926,COMPLETE
3,3,0.483914,2022-03-27 23:02:35.134511,2022-03-27 23:02:36.227784,0 days 00:00:01.093273,0.862374,0.480028,0.483914,COMPLETE
4,4,0.483908,2022-03-27 23:02:36.258593,2022-03-27 23:02:37.375907,0 days 00:00:01.117314,0.791089,0.480022,0.483908,COMPLETE
5,5,0.483369,2022-03-27 23:02:37.412066,2022-03-27 23:02:53.237445,0 days 00:00:15.825379,0.009949,0.479653,0.483369,COMPLETE
6,6,0.483886,2022-03-27 23:02:53.266452,2022-03-27 23:02:54.358355,0 days 00:00:01.091903,0.468625,0.48,0.483886,COMPLETE
7,7,0.483927,2022-03-27 23:02:54.391102,2022-03-27 23:02:55.493149,0 days 00:00:01.102047,0.998016,0.480042,0.483927,COMPLETE
8,8,0.483888,2022-03-27 23:02:55.529674,2022-03-27 23:02:56.634525,0 days 00:00:01.104851,0.506089,0.480002,0.483888,COMPLETE
9,9,0.483911,2022-03-27 23:02:56.673202,2022-03-27 23:02:57.775763,0 days 00:00:01.102561,0.828103,0.480025,0.483911,COMPLETE
