In [89]:
import os
import numpy as np

from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
#!{sys.executable} -m pip install bayesian_optimization
from bayes_opt import BayesianOptimization

# SETTINGS

np.random.seed(124)
np.set_printoptions(precision=14)

test_size = 0.1


In [90]:
def get_data():

    ## train & test paths
    data_dir = "./data/"

    ### csv w/ layout: "y,x1,x2,x3,x4,x5"
    train_file = "train.csv"
    data_path = data_dir + train_file
    
    data_type = np.double # np dtype object for values in X and Y

    X = np.genfromtxt(data_path,
                        dtype = data_type,
                        delimiter = ",",
                        skip_header = 1,
                        usecols = np.arange(2, 5 + 2))

    y = np.genfromtxt(data_path,
                        dtype = data_type,
                        delimiter = ",",
                        skip_header = 1,
                        usecols = np.arange(1, 1 + 1))

    return X, y

In [91]:
def save_submission(submission):

    ## prediction submission paths
    sub_dir = "./submission/"
    sub_file = "submission.csv"
    sub_path = sub_dir + sub_file


    if not os.path.isdir(sub_dir):
        os.mkdir(sub_dir)


    ## submission csv header
    sub_header = ""
    sub_format = "%.13f"
    np.savetxt(sub_path,
               submission,
               fmt = sub_format,
               delimiter = ',',
               header = sub_header,
               comments = '')

In [92]:

def get_phi(X):
    phi_1_5 = lambda x: x
    phi_6_10 = lambda x: np.square(x)
    phi_11_15 = lambda x: np.exp(x)
    phi_16_20 = lambda x: np.cos(x)
    phi_21 = lambda x: np.array([1])

    phi = []
    for x in X:
        phi_x = np.concatenate([phi_(x) for phi_ in [phi_1_5, phi_6_10, phi_11_15, phi_16_20, phi_21]])
        # print(phi_x)
        phi.append(phi_x)

    return phi

In [93]:
X, y = get_data()
phi = get_phi(X)
print(phi[5])

X_train, X_test, y_train, y_test = train_test_split(phi, y, test_size = test_size, random_state=124)



[-0.05             -0.16              0.               -0.39
 -0.07              0.0025            0.0256            0.
  0.1521            0.0049            0.95122942450071  0.85214378896621
  1.                0.67705687449816  0.93239381990595  0.99875026039497
  0.98722728337563  1.                0.92490905985731  0.99755100025328
  1.              ]


In [94]:

def get_params_ridge(params):
    # params["solver"] = 'lsqr' 
    params["fit_intercept"] = False
    return params


def bo_tune_ridge(alpha):

    params = get_params_ridge({"alpha": alpha})
    mdl = Ridge(alpha=alpha, fit_intercept=False)

    print(f"Params: alpha={alpha}")
    scores = cross_val_score(mdl, X_train, y_train, cv=10, n_jobs=5, scoring="neg_root_mean_squared_error")
    print(scores, scores.mean())
    # print("recall", recall)

    return scores.mean()

parameter_range_ridge = {
    "alpha": (0, 10)
}

ridge = Ridge, bo_tune_ridge, parameter_range_ridge, get_params_ridge


def get_params_elastic(params):
    # params["solver"] = 'lsqr' 
    params["fit_intercept"] = False
    return params

def bo_tune_elastic(alpha, l1_ratio):

    mdl = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=False)

    print(f"Params: alpha={alpha}")
    scores = cross_val_score(mdl, X_train, y_train, cv=10, n_jobs=5, scoring="neg_root_mean_squared_error")
    
    print(scores, scores.mean())
    # print("recall", recall)

    return scores.mean()

parameter_range_elastic = {
    "l1_ratio": (0, 1),
    "alpha": (0, 10)
}

elastic = ElasticNet, bo_tune_elastic, parameter_range_elastic, get_params_elastic


def get_params_lasso(params):
    # params["solver"] = 'lsqr' 
    params["fit_intercept"] = False
    return params

def bo_tune_lasso(alpha):

    mdl = Lasso(alpha=alpha, fit_intercept=False)

    print(f"Params: alpha={alpha}")
    scores = cross_val_score(mdl, X_train, y_train, cv=10, n_jobs=5, scoring="neg_root_mean_squared_error")
    
    print(scores, scores.mean())
    # print("recall", recall)

    return scores.mean()

parameter_range_lasso = {
    "alpha": (0, 10)
}

lasso = Lasso, bo_tune_lasso, parameter_range_lasso, get_params_lasso

# def bo_tune_lin_reg():

#     mdl = LinearRegression()

#     print(f"Params: None")
#     scores = cross_val_score(mdl, X_train, y_train, cv=10, n_jobs=5, scoring="neg_root_mean_squared_error")
#     print(scores, scores.mean())
#     # print("recall", recall)

#     return scores.mean()

# parameter_range_lin_reg = {
# }

# lin_reg = LinearRegression, bo_tune_lin_reg, parameter_range_lin_reg

to_test = [elastic, ridge, lasso] # [ridge, lasso]

In [95]:
best_mdls = []
# best_mdls.append(LinearRegression().fit(X_train, y_train))
for mdl, tune_func, param_ranges, to_params in to_test:
  bo = BayesianOptimization(tune_func, param_ranges)
    #performing Bayesian optimization for 5 iterations with 8 steps of random exploration with an #acquisition function of expected improvement
  bo.maximize(n_iter=30, init_points=20, acq='ei')

  #Extracting the best parameters
  params = bo.max['params']
  print(params)

  best_score = bo.max["target"]
  print("best score ", best_score)
  ##  best alpha = 5.2
  # params = {"alpha": 5.2}
  params = to_params(params)
  model = mdl(**params).fit(X_train, y_train)
  
  best_mdls.append(model)



|   iter    |  target   |   alpha   | l1_ratio  |
-------------------------------------------------
Params: alpha=6.494677988223141
[-5.1065276139152  -4.93733751878385 -5.19207011815992 -4.82924196110759
 -5.55754708477611 -4.73393897844647 -5.29301123339129 -4.96515067603681
 -4.50819159937018 -4.80692947409641] -4.992994625808385
| [0m 1       [0m | [0m-4.993   [0m | [0m 6.495   [0m | [0m 0.6216  [0m |
Params: alpha=6.211436875021333
[-4.86036302473819 -4.69950788055602 -4.95115238637324 -4.58221204743548
 -5.31335090338755 -4.50042186554946 -5.0497917236573  -4.72587379786294
 -4.27387319492674 -4.56993412398947] -4.752648094847638
| [95m 2       [0m | [95m-4.753   [0m | [95m 6.211   [0m | [95m 0.5966  [0m |
Params: alpha=6.887279570219631
[-4.27656957874551 -4.14425857274597 -4.38310491332497 -4.00030106764434
 -4.7324559144425  -3.95740576721139 -4.47522895429794 -4.16751896308662
 -3.73571429369697 -4.01696657111403] -4.188952459631024
| [95m 3       [0m | [95

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-2.14948800793354 -2.23712343586307 -2.32499247208659 -1.89332100935667
 -2.58335664027109 -2.22176015272511 -2.37846460505768 -2.24233810260829
 -2.02965894351646 -2.14116646206831] -2.2201669831486806
| [0m 23      [0m | [0m-2.22    [0m | [0m 2.034   [0m | [0m 0.0     [0m |
Params: alpha=1.4019242216112016
[-2.3936886398311  -2.40370082346321 -2.53166079918402 -2.0995292291593
 -2.83022554958272 -2.36490301106977 -2.61092592436755 -2.42719166543871
 -2.14152836026344 -2.29905283129594] -2.4102406833655747
| [0m 24      [0m | [0m-2.41    [0m | [0m 1.402   [0m | [0m 0.9964  [0m |
Params: alpha=0.3995519983089668


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.88051611648014 -2.03279414950347 -2.03973883958416 -1.62404622157811
 -2.22772118144162 -2.12469356140427 -2.07177067178218 -2.05348415564995
 -1.96588432191039 -1.97891157416472] -1.9999560793499036
| [0m 25      [0m | [0m-2.0     [0m | [0m 0.3996  [0m | [0m 0.9998  [0m |
Params: alpha=0.581716979863486


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.8338927565661  -2.03185775024626 -2.0170761123247  -1.63001690000243
 -2.19954647507419 -2.11766035327641 -2.04468380194537 -2.02868037204106
 -1.97067617325026 -1.9684470560875 ] -1.9842537750814286
| [0m 26      [0m | [0m-1.984   [0m | [0m 0.5817  [0m | [0m 0.0     [0m |
Params: alpha=0.015514293835371396
[-1.78330134089426 -1.98702322740138 -1.95043908591773 -1.58031721998038
 -2.05419751312883 -2.13694019295482 -1.94470785619921 -2.00789976936451
 -2.04850240404164 -1.98633833546505] -1.9479666945347822
| [95m 27      [0m | [95m-1.948   [0m | [95m 0.01551 [0m | [95m 0.9982  [0m |
Params: alpha=0.0


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[-1.86581449607262 -1.986292790553   -1.9693874488443  -1.58469210171663
 -2.06906949694605 -2.14638061837157 -1.94434304328524 -1.98788854327767
 -2.06929858508909 -2.0312791826428 ] -1.9654446306798974
| [0m 28      [0m | [0m-1.965   [0m | [0m 0.0     [0m | [0m 0.285   [0m |
Params: alpha=1.4463796916723823


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-2.00722698514328 -2.13476980143421 -2.18794672739044 -1.76555987834262
 -2.42266139208915 -2.15524745768689 -2.23296314333485 -2.13687893029638
 -1.97727623199289 -2.04907592189464] -2.106960646960534
| [0m 29      [0m | [0m-2.107   [0m | [0m 1.446   [0m | [0m 0.0     [0m |
Params: alpha=0.00014462823211047748


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.84958162276175 -1.98701823216601 -1.96682679996745 -1.58126008305762
 -2.06026046100369 -2.14214355340949 -1.9465963943619  -1.98624889607782
 -2.06749773056026 -2.01132219437567] -1.959875596774166
| [0m 30      [0m | [0m-1.96    [0m | [0m 0.000144[0m | [0m 0.01028 [0m |
Params: alpha=0.14213833131054288


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.80045782600505 -2.00161291912716 -1.97437845057631 -1.58669984952893
 -2.10473931051343 -2.12338924025282 -1.98018597828598 -2.0079519312681
 -2.00691271337726 -1.9678211931713 ] -1.955414941210633
| [0m 31      [0m | [0m-1.955   [0m | [0m 0.1421  [0m | [0m 0.9465  [0m |
Params: alpha=0.024261731571713074


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.78250289006406 -1.99448967407999 -1.95331923060747 -1.58060600984002
 -2.05950746795598 -2.13930415944429 -1.94799680252577 -2.0036253270281
 -2.03926606731876 -1.97753767867346] -1.94781553075379
| [95m 32      [0m | [95m-1.948   [0m | [95m 0.02426 [0m | [95m 0.864   [0m |
Params: alpha=2.8210090543117716
[-2.35140075967552 -2.39572283557826 -2.51885212624975 -2.08436144078793
 -2.79959881552896 -2.34191037235964 -2.58069694438602 -2.40434417425136
 -2.13753504883215 -2.29022640674113] -2.390464892439072
| [0m 33      [0m | [0m-2.39    [0m | [0m 2.821   [0m | [0m 0.002933[0m |
Params: alpha=0.3822472076516126
[-1.8369425938145  -2.02982867294296 -2.02000037275897 -1.62576188476488
 -2.19685605805188 -2.11839338226464 -2.04356521984236 -2.0271117464184
 -1.96924166689189 -1.97293816419399] -1.9840639761944463
| [0m 34      [0m | [0m-1.984   [0m | [0m 0.3822  [0m | [0m 0.6788  [0m |
Params: alpha=0.0029578553345355196


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.80502485986532 -1.99071518155478 -1.95854645989693 -1.58438352406004
 -2.05568504039578 -2.13694843236251 -1.94682984688339 -2.00511835058549
 -2.05943148591709 -1.99239414410128] -1.9535077325622587
| [0m 35      [0m | [0m-1.954   [0m | [0m 0.002958[0m | [0m 0.5541  [0m |
Params: alpha=0.37477701559404486
[-1.80623056009217 -2.02332286806177 -1.99154721989346 -1.61849844908968
 -2.16236087640187 -2.12263436524598 -2.0136733752657  -2.01834917747239
 -1.98407933934311 -1.96421733566383] -1.970491356652996
| [0m 36      [0m | [0m-1.97    [0m | [0m 0.3748  [0m | [0m 0.1275  [0m |
Params: alpha=0.0


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[-1.86581449607262 -1.986292790553   -1.9693874488443  -1.58469210171663
 -2.06906949694605 -2.14638061837157 -1.94434304328524 -1.98788854327767
 -2.06929858508909 -2.0312791826428 ] -1.9654446306798974
| [0m 37      [0m | [0m-1.965   [0m | [0m 0.0     [0m | [0m 0.7479  [0m |
Params: alpha=0.13043295330027238
[-1.7814212635349  -2.01408934173352 -1.96292696955255 -1.60377823405879
 -2.10843791204667 -2.13613769562732 -1.97636305553049 -2.00951287498981
 -2.00997804870544 -1.96780538205677] -1.9570450777836252
| [0m 38      [0m | [0m-1.957   [0m | [0m 0.1304  [0m | [0m 0.4909  [0m |
Params: alpha=0.02294577885611937


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.78369238181948 -1.98930170495006 -1.95235374187656 -1.57909354638607
 -2.05638429765449 -2.136235828241   -1.94573787689891 -2.00666947001781
 -2.04095852530729 -1.98122607631967] -1.9471653449471344
| [95m 39      [0m | [95m-1.947   [0m | [95m 0.02295 [0m | [95m 0.9455  [0m |
Params: alpha=1.6394416863437544
[-2.22904145306318 -2.29904555760635 -2.40504437383021 -1.96864879370884
 -2.67486034189161 -2.26432471314944 -2.46083563501215 -2.30541632273848
 -2.06898331542757 -2.19860078982515] -2.2874801296252985
| [0m 40      [0m | [0m-2.287   [0m | [0m 1.639   [0m | [0m 0.4058  [0m |
Params: alpha=0.810468386018951


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-2.04190909482359 -2.13514033424464 -2.1960332698025  -1.7681293329095
 -2.44291289053158 -2.17065847824574 -2.25750342284652 -2.15343970752071
 -1.9800525696028  -2.05704368240305] -2.1202822782930624
| [0m 41      [0m | [0m-2.12    [0m | [0m 0.8105  [0m | [0m 0.9989  [0m |
Params: alpha=0.0548499731532015


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.78532585475635 -1.99793395497106 -1.95864055782161 -1.58381697014386
 -2.07220783874392 -2.13524231760257 -1.95734551860001 -2.00319412863646
 -2.03016420057635 -1.97320508468896] -1.9497076426541162
| [0m 42      [0m | [0m-1.95    [0m | [0m 0.05485 [0m | [0m 0.9018  [0m |
Params: alpha=0.35731193966568986
[-1.80222769271448 -2.01939209478976 -1.98391706340802 -1.61190019578472
 -2.14806047334966 -2.12427160298192 -2.00562467798004 -2.01496318707489
 -1.98771966614342 -1.96375525456568] -1.9661831908792586
| [0m 43      [0m | [0m-1.966   [0m | [0m 0.3573  [0m | [0m 0.002875[0m |
Params: alpha=0.015263323161197606


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.78257920988984 -1.98916494860563 -1.95098882837152 -1.57958648542978
 -2.05403897129897 -2.13730812809533 -1.94442149214415 -2.00778214879388
 -2.04474955846957 -1.98331600452094] -1.9473935775619613
| [0m 44      [0m | [0m-1.947   [0m | [0m 0.01526 [0m | [0m 0.9421  [0m |
Params: alpha=0.01044395021137512


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.78420617353776 -1.9896296060926  -1.95002703790981 -1.58113333141424
 -2.05401733477663 -2.13804196451659 -1.94401301916336 -2.00850978644631
 -2.04763467562506 -1.98571821104174] -1.9482931140524102
| [0m 45      [0m | [0m-1.948   [0m | [0m 0.01044 [0m | [0m 0.9359  [0m |
Params: alpha=0.17847616377092312
[-1.79729671975232 -2.01231207237566 -1.97682541552257 -1.59973738867348
 -2.12404356324401 -2.12761851547134 -1.99087693982532 -2.01050402030248
 -1.99661025757379 -1.96790900982246] -1.9603733902563423
| [0m 46      [0m | [0m-1.96    [0m | [0m 0.1785  [0m | [0m 0.7889  [0m |
Params: alpha=0.8914395620513127
[-1.88906973451665 -2.06048217293239 -2.07280296517188 -1.66911070718308
 -2.27732912260456 -2.12073251418696 -2.10739224479397 -2.05922831394868
 -1.96108121593087 -1.98780159703159] -2.0205030588300628
| [0m 47      [0m | [0m-2.021   [0m | [0m 0.8914  [0m | [0m 0.005664[0m |
Params: alpha=0.01688480363007505


  model = cd_fast.enet_coordinate_descent(


[-1.78327458287732 -1.98749925430583 -1.9508492805605  -1.57993420815158
 -2.054332358062   -2.13683796903262 -1.94486429292079 -2.00800162370076
 -2.04669898639175 -1.98498456313846] -1.94772771191416
| [0m 48      [0m | [0m-1.948   [0m | [0m 0.01688 [0m | [0m 0.9847  [0m |
Params: alpha=0.046965401584765365


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.78579996742939 -1.99574931041004 -1.95670175140756 -1.57939862429085
 -2.0669202284306  -2.13401162418163 -1.9521886824212  -2.00332817987699
 -2.03202967381204 -1.97532417990987] -1.9481452222170177
| [0m 49      [0m | [0m-1.948   [0m | [0m 0.04697 [0m | [0m 0.9429  [0m |
Params: alpha=0.13248079003549962
[-1.77786862051467 -2.01549722353991 -1.95894270432728 -1.60569121604152
 -2.10447522529357 -2.1390393663677  -1.97323630433749 -2.00971501948855
 -2.01340756899663 -1.96802711151657] -1.9565900360423882
| [0m 50      [0m | [0m-1.957   [0m | [0m 0.1325  [0m | [0m 0.1389  [0m |
{'alpha': 0.02294577885611937, 'l1_ratio': 0.9455323891912856}
best score  -1.9471653449471344
|   iter    |  target   |   alpha   |
-------------------------------------
Params: alpha=2.3646375478774786


  model = cd_fast.enet_coordinate_descent(


[-1.80460770517537 -1.98998585684856 -1.95821890663894 -1.57970107402571
 -2.05326662794488 -2.13836246757293 -1.94443495370067 -2.00029325162977
 -2.05405720100915 -1.99167771603762] -1.9514605760583579
| [0m 1       [0m | [0m-1.951   [0m | [0m 2.365   [0m |
Params: alpha=8.638962759561013
[-1.78788860941502 -2.00028789956504 -1.95238259522739 -1.58791471485362
 -2.06332641861896 -2.14196607455044 -1.95051898912303 -2.00357975679605
 -2.04068309429698 -1.98079625462884] -1.9509344407075369
| [95m 2       [0m | [95m-1.951   [0m | [95m 8.639   [0m |
Params: alpha=9.96757105849576
[-1.78636732391792 -2.00155877090881 -1.95194217651484 -1.58905146771274
 -2.06490164356301 -2.14242406904412 -1.9515160124283  -2.00400397737432
 -2.03926909354476 -1.97979326758612] -1.9510827802594946
| [0m 3       [0m | [0m-1.951   [0m | [0m 9.968   [0m |
Params: alpha=7.0605412646160906
[-1.7901896015856  -1.99847817336997 -1.95311096313446 -1.58633940098405
 -2.06123376932606 -2.14129075

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.83869631844199 -2.01909701064334 -2.00316106907782 -1.59530805927766
 -2.15149966782035 -2.1201310725993  -2.01444815582321 -2.03467530567225
 -1.98377297493882 -1.96880045006749] -1.9729590084362225
| [0m 1       [0m | [0m-1.973   [0m | [0m 0.2438  [0m |
Params: alpha=8.584339184527625
[-6.75768039306572 -6.57481627031564 -6.81985952892644 -6.50647251278307
 -7.16735575951903 -6.37274261687369 -6.92024609535723 -6.60456078536793
 -6.16712000003626 -6.45138648669997] -6.634224044894497
| [0m 2       [0m | [0m-6.634   [0m | [0m 8.584   [0m |
Params: alpha=2.2063835093916673
[-2.98858140165497 -2.91761249284453 -3.10518391265692 -2.68957973917881
 -3.44145492861405 -2.80784387288845 -3.19781549268019 -2.94664262366832
 -2.56665449811707 -2.79435670788532] -2.945572567018864
| [0m 3       [0m | [0m-2.946   [0m | [0m 2.206   [0m |
Params: alpha=1.388102398360298
[-2.38095008921004 -2.39391603582905 -2.52035464725152 -2.08770635916957
 -2.81368398694374 -2.356907031685

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.82791372191698 -2.01793508023679 -1.99442658111739 -1.5901819369304
 -2.1357741080343  -2.12015988413396 -2.00217922054045 -2.02744374682401
 -1.98992417900742 -1.96787665108989] -1.9673815109831594
| [95m 5       [0m | [95m-1.967   [0m | [95m 0.2121  [0m |
Params: alpha=9.808377669040262
[-6.75768039306572 -6.57481627031564 -6.81985952892644 -6.50647251278307
 -7.16735575951903 -6.37274261687369 -6.92024609535723 -6.60456078536793
 -6.16712000003626 -6.45138648669997] -6.634224044894497
| [0m 6       [0m | [0m-6.634   [0m | [0m 9.808   [0m |
Params: alpha=0.04744564054758871
[-1.7882307714434  -1.98814015937917 -1.95597349318028 -1.57922334582077
 -2.06397772957382 -2.13225155810265 -1.95118960028139 -2.0081435678463
 -2.0362786263758  -1.98033318261675] -1.9483742034620337
| [95m 7       [0m | [95m-1.948   [0m | [95m 0.04745 [0m |
Params: alpha=7.694588231926263
[-6.75768039306572 -6.57481627031564 -6.81985952892644 -6.50647251278307
 -7.16735575951903 -6.372742

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.81592780080995 -1.99019873901155 -1.96054456230422 -1.57620857327215
 -2.04658413921055 -2.13480704018963 -1.93979007153288 -1.9843644155357
 -2.0670622201315  -1.99095432277592] -1.9506441884774066
| [0m 21      [0m | [0m-1.951   [0m | [0m 0.001659[0m |
Params: alpha=9.520713087018962e-05


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.86184672671543 -1.9863033426309  -1.96825046172105 -1.57981774133855
 -2.06564806114848 -2.14414610112612 -1.94538785311641 -1.98671105141633
 -2.0680955878604  -2.02653849780083] -1.9632745424874483
| [0m 22      [0m | [0m-1.963   [0m | [0m 9.521e-0[0m |
Params: alpha=0.0016903603539952794


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.81518382178869 -1.99025267199247 -1.96042759002622 -1.57621241279788
 -2.04625365930678 -2.13473035387006 -1.93975898325135 -1.98469522248001
 -2.06719244709772 -1.99089317243147] -1.9505600335042648
| [0m 23      [0m | [0m-1.951   [0m | [0m 0.00169 [0m |
Params: alpha=9.101745382711357e-05


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[-1.86204535895474 -1.98630673148296 -1.968305967279   -1.57998610649086
 -2.06579596175598 -2.14424114395652 -1.94533367248387 -1.98676025828498
 -2.06811278082629 -2.02674535313018] -1.9633633334645384
| [0m 24      [0m | [0m-1.963   [0m | [0m 9.102e-0[0m |
Params: alpha=0.07871350858640103
[-1.7935827378408  -1.99366908115162 -1.96191847642937 -1.57938772323239
 -2.07624195802813 -2.12843173111334 -1.9590549366135  -2.00937835779284
 -2.02556169164771 -1.97574499532221] -1.9502971689171926
| [0m 25      [0m | [0m-1.95    [0m | [0m 0.07871 [0m |
Params: alpha=0.0800788747957848
[-1.79382135879668 -1.99392418216822 -1.96219289367383 -1.57941402471966
 -2.07679622985925 -2.1282814265674  -1.95941994997395 -2.00944717594939
 -2.02511549655121 -1.97556996936733] -1.950398270762691
| [0m 26      [0m | [0m-1.95    [0m | [0m 0.08008 [0m |
Params: alpha=0.0832857147617505
[-1.79438926782568 -1.99452867534483 -1.96284142593156 -1.57948434890804
 -2.07810302370596 -2.12793393

In [96]:
from sklearn.metrics import mean_squared_error

best_val_error = 100000000
for mdl in best_mdls:

    print(f"MODEL {mdl}:")
    score = mdl.score(X_test, y_test)

    print(f"   SCORE: {score}")
    #Looking at the classification report
    print(f"COEFFS: {mdl.coef_}")

    y_pred_test = mdl.predict(X_test)
    y_pred_train = mdl.predict(X_train)

    train_error = mean_squared_error(y_train, y_pred_train, squared=False)
    validation_error = mean_squared_error(y_test, y_pred_test, squared=False)

    print("train_error", train_error)
    print("validation_error", validation_error)

    if(validation_error < best_val_error):
        best_val_error = validation_error
        best_model = mdl
    # print(y_pred_test)


MODEL ElasticNet(alpha=0.02294577885611937, fit_intercept=False,
           l1_ratio=0.9455323891912856):
   SCORE: 0.004014170559650698
COEFFS: [ 0.               -0.               -0.                0.
  0.               -0.                0.                0.
 -0.               -0.               -0.               -1.58558198763499
 -1.95110413657762 -0.               -0.               -0.4573256021322
 -1.63323856660041 -0.75536376716751 -0.               -0.03987855495858
 -0.1015870972053 ]
train_error 1.9435863913710139
validation_error 1.98982694716749
MODEL Ridge(alpha=5.43376698039975, fit_intercept=False):
   SCORE: -0.0058187169325116805
COEFFS: [ 0.40817151722712 -0.4794794846399  -0.55585553092405  0.34366070379567
  0.3090944365502  -0.35251216774685  0.08117237757756  0.10726519958863
 -0.23130027961696 -0.06172580453536 -0.36577225119155 -1.03261322426938
 -1.09912308133646 -0.35855898260309 -0.31069130358285 -0.41720311284835
 -0.63270023172964 -0.64564955614287 -0.479

In [97]:


save_submission(best_model.coef_)


In [98]:
print(X_test)

[array([-2.00000000000000e-01,  1.20000000000000e-01,
       -1.00000000000000e-01, -4.00000000000000e-01,
       -3.00000000000000e-02,  4.00000000000000e-02,
        1.44000000000000e-02,  1.00000000000000e-02,
        1.60000000000000e-01,  9.00000000000002e-04,
        8.18730753077982e-01,  1.12749685157938e+00,
        9.04837418035960e-01,  6.70320046035639e-01,
        9.70445533548508e-01,  9.80066577841242e-01,
        9.92808635853866e-01,  9.95004165278026e-01,
        9.21060994002885e-01,  9.99550033748988e-01,
        1.00000000000000e+00]), array([-2.30000000000000e-01, -1.00000000000000e-02,
       -5.00000000000000e-02, -1.30000000000000e-01,
       -5.00000000000000e-02,  5.29000000000000e-02,
        1.00000000000000e-04,  2.50000000000000e-03,
        1.69000000000000e-02,  2.50000000000000e-03,
        7.94533602503334e-01,  9.90049833749168e-01,
        9.51229424500714e-01,  8.78095430920561e-01,
        9.51229424500714e-01,  9.73666395005375e-01,
        9.999