In [2]:
using Pkg
Pkg.add("ProgressBars")

[32m[1m   Updating[22m[39m registry at `~/.julia/registries/General`
######################################################################### 100.0%
[32m[1m  Resolving[22m[39m package versions...
[32m[1m  Installed[22m[39m ProgressBars ─ v0.7.1
[32m[1mUpdating[22m[39m `~/.julia/environments/v1.5/Project.toml`
 [90m [49802e3a] [39m[92m+ ProgressBars v0.7.1[39m
[32m[1mUpdating[22m[39m `~/.julia/environments/v1.5/Manifest.toml`
 [90m [49802e3a] [39m[92m+ ProgressBars v0.7.1[39m


In [3]:
using Random, LinearAlgebra, Plots
# bring packages into main namespace
using DataFrames             # Data tables are called "DataFrames"
using StatsPlots             # load plotting packages 
using Statistics             # basic statistical functions
using CSV                    # tools for working with CSV files
using Plots, Random, LinearAlgebra, Statistics, SparseArrays
using ProgressBars
include("proxgrad.jl")
pyplot()

┌ Info: Precompiling ProgressBars [49802e3a-d2f1-5c88-81d8-b72133a6f568]
└ @ Base loading.jl:1278


Plots.PyPlotBackend()

In [33]:
dataset = CSV.read("Consumer_Airfare_Report__Table_1a_-_All_U.S._Airport_Pair_Markets.csv")
dataset = dropmissing(dataset)
dataset[:when] = dataset[:Year] .+ ((dataset[:quarter] .-1) ./ 4)
n = size(dataset,1)
r = .1
cuttoff = Int(round(n*r))
idxs = rand(1:n,n)

# We need to hold out a test dataset to report final results on, that is not used at all for model selection.
# Within k-fold cross validation, train and val datasets will be formed from this main train dataset. 
test_dataset  = dataset[idxs[1:cuttoff],:]
train_dataset = dataset[idxs[cuttoff+1:n],:]

n_train = size(train_dataset,1)
n_test = size(test_dataset,1)

@assert  n_train + n_test == n
@show n_train, n_test
head(dataset)

(n_train, n_test) = (181253, 20139)


Unnamed: 0_level_0,tbl,Year,quarter,citymarketid_1,citymarketid_2,city1
Unnamed: 0_level_1,String,Int64,Int64,Int64,Int64,String
1,Table 1a,2010,1,34614,33195,"Salt Lake City, UT"
2,Table 1a,1998,4,30189,31703,"Colorado Springs, CO"
3,Table 1a,1998,4,30198,30852,"Pittsburgh, PA"
4,Table 1a,2009,3,32211,32575,"Las Vegas, NV"
5,Table 1a,1993,4,30255,30852,"Huntsville, AL"
6,Table 1a,2010,4,33198,32575,"Kansas City, MO"


In [34]:
# Do k-fold cross validation and return the average error_metric on the validation set accross the k folds.
function cross_val(featurizer, loss, regularizer, stepsize, error_metric; k=10, dataset=train_dataset)
    X,y = featurizer(dataset)
    n = size(dataset,1)
    r = Int(round(n / k))
    idxs = rand(1:n,n) # to shuffle the dataset
    error = 0
    for i in tqdm(1:k)
        val_idxs = r*(i-1)+1:min(r*i,n)
        tr_low = 1:r*(i-1)
        tr_high = r*i+1:n
        if (i == 1)
            tr_idxs = tr_high
        elseif (i == k)
            tr_idxs = tr_low
        else
            tr_idxs = [tr_low ; tr_high ]
        end
        # @show i, val_idxs
        # @show tr_low, tr_high
        X_tr = X[idxs[tr_idxs],:]
        y_tr = y[idxs[tr_idxs]]
        
        X_val = X[idxs[val_idxs],:]
        y_val = y[idxs[val_idxs]]
        
        w = proxgrad(loss, regularizer, X_tr, y_tr; stepsize=stepsize) 
        ŷ_val = X_val * w
        # @show size(y_val)
        # @show size(ŷ_val)
        error += error_metric(ŷ_val, y_val)
    end
    return error / k
end

# For each model in models, do k-fold cross validation and calculate the average error_metric
# on the val set accross the k-folds.
# Each model in model is a tuple of the form (featurizer, loss, regularizer, stepsize),
# where a featurizer is a funciton that takes in a dataset and returns X,y. 
# Returns errors for each model, and the index of the best model
function test_models(models, error_metric;k=10, dataset=train_dataset)
    errors = []
    for model in models
        error = cross_val(model...,error_metric;k=k,dataset=dataset)
        errors = [errors; error]
    end
    i = argmin(errors)
    println("The best model is model ",i)
    return errors,i
end

test_models (generic function with 1 method)

# Feature Engineering and Selection

In [36]:
cat_labels = [:carrier_lg, :carrier_low, :airportid_1, :airportid_2]
cat_labels_sub = [:carrier_lg, :carrier_low]

#Sets of all categories in a particular column
cats_sets = [unique(dataset[:, label]) for label in cat_labels]
cats_sets_sub = [unique(dataset[:, label]) for label in cat_labels_sub]

"Computes a onehot vector for every entry in column given a set of categories cats"
function onehot(column, cats=unique(column))
    result = zeros(length(column), length(cats))
    for i = 1:length(column)
        for j =1:length(cats)
            if column[i] === cats[j]
                result[i, j] = 1
            end    
        end
    end
    result
end

onehot

In [37]:
labels_real = [
    :when, 
    :citymarketid_1, 
    :citymarketid_2,
    :airportid_1,
    :airportid_2,
    :nsmiles,
    :passengers,
    :large_ms,
    :fare_lg,
    :lf_ms,
    :fare_low
]
labels_real_sub = [
    :when, 
    :nsmiles,
    :passengers,
    :large_ms,
    :fare_lg,
    :lf_ms,
    :fare_low
]

7-element Array{Symbol,1}:
 :when
 :nsmiles
 :passengers
 :large_ms
 :fare_lg
 :lf_ms
 :fare_low

In [44]:
# Baseline featurizer
function feats_0(dataset)
    X = [dataset[:when] dataset[:airportid_1] dataset[:airportid_2] ones(size(dataset,1))]
    y = dataset[:fare]
    return X,y
end

# Only numeric features (treating airportid and cityid as numeric)
function feats_1(dataset)
    real_vals = convert(Matrix, dataset[labels_real])
    X = hcat(real_vals, ones(size(dataset,1)))
    y = dataset[:fare]
    return X,y
end

# Numeric and categorical combined (treating airportid and cityid as numerical)
function feats_2(dataset)
    real_vals = convert(Matrix, dataset[labels_real])
    cat_vals = hcat([onehot(dataset[:, cat_labels_sub[i]], cats_sets_sub[i]) for i in 1:size(cat_labels_sub, 1)]...)
    X = hcat(cat_vals, real_vals, ones(size(dataset,1)))
    y = dataset[:fare]
    return X,y
end

# Numeric and categorical combined (treating airportid and cityid as categorical)
function feats_3(dataset)
    real_vals = convert(Matrix, dataset[labels_real_sub])
    cat_vals = hcat([onehot(dataset[:, cat_labels[i]], cats_sets[i]) for i in 1:size(cat_labels, 1)]...)
    X = hcat(cat_vals, real_vals, ones(size(dataset,1)))
    y = dataset[:fare]
    return X,y
end

MSE(L1,L2) = sum((L1.-L2).^2) / size(L1,1)

models = [
    (feats_0, 1/n_train*QuadLoss(), 0.25*QuadReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), 0.75*QuadReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), ZeroReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), 0.25*OneReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), 0.5 *OneReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), 0.75*OneReg(), .1),
    (feats_0, 1/n_train*QuadLoss(), NonNegConstraint(), .1),
    (feats_0, 1/n_train*L1Loss(), 0.25*QuadReg(), .1),
    (feats_0, 1/n_train*L1Loss(), 0.5 *QuadReg(), .1),
    (feats_0, 1/n_train*L1Loss(), 0.75*QuadReg(), .1),
    (feats_0, 1/n_train*L1Loss(), ZeroReg(), .1),
    (feats_0, 1/n_train*L1Loss(), 0.25*OneReg(), .1),
    (feats_0, 1/n_train*L1Loss(), 0.5 *OneReg(), .1),
    (feats_0, 1/n_train*L1Loss(), 0.75*OneReg(), .1),
    (feats_0, 1/n_train*L1Loss(), NonNegConstraint(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .1),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .1),
    (feats_0, 1/n_train*QuadLoss(), 0.25*QuadReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), 0.75*QuadReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), ZeroReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), 0.25*OneReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), 0.5 *OneReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), 0.75*OneReg(), .5),
    (feats_0, 1/n_train*QuadLoss(), NonNegConstraint(), .5),
    (feats_0, 1/n_train*L1Loss(), 0.25*QuadReg(), .5),
    (feats_0, 1/n_train*L1Loss(), 0.5 *QuadReg(), .5),
    (feats_0, 1/n_train*L1Loss(), 0.75*QuadReg(), .5),
    (feats_0, 1/n_train*L1Loss(), ZeroReg(), .5),
    (feats_0, 1/n_train*L1Loss(), 0.25*OneReg(), .5),
    (feats_0, 1/n_train*L1Loss(), 0.5 *OneReg(), .5),
    (feats_0, 1/n_train*L1Loss(), 0.75*OneReg(), .5),
    (feats_0, 1/n_train*L1Loss(), NonNegConstraint(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .5),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .5),
    (feats_0, 1/n_train*QuadLoss(), 0.25*QuadReg(), .9),
    (feats_0, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .9),
    (feats_0, 1/n_train*QuadLoss(), 0.75*QuadReg(), .9),
    (feats_0, 1/n_train*QuadLoss(), ZeroReg(), .9),
    (feats_0, 1/n_train*QuadLoss(), 0.25*OneReg(), .9),
    (feats_0, 1/n_train*QuadLoss(), 0.5 *OneReg(), .9), #best model for baseline features
    (feats_0, 1/n_train*QuadLoss(), 0.75*OneReg(), .9),
    (feats_0, 1/n_train*QuadLoss(), NonNegConstraint(), .9),
    (feats_0, 1/n_train*L1Loss(), 0.25*QuadReg(), .9),
    (feats_0, 1/n_train*L1Loss(), 0.5 *QuadReg(), .9),
    (feats_0, 1/n_train*L1Loss(), 0.75*QuadReg(), .9),
    (feats_0, 1/n_train*L1Loss(), ZeroReg(), .9),
    (feats_0, 1/n_train*L1Loss(), 0.25*OneReg(), .9),
    (feats_0, 1/n_train*L1Loss(), 0.5 *OneReg(), .9),
    (feats_0, 1/n_train*L1Loss(), 0.75*OneReg(), .9),
    (feats_0, 1/n_train*L1Loss(), NonNegConstraint(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .9),
    (feats_0, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .9)
]

errors,i = test_models(models,MSE;k=15)

100.0%┣██████████████████████████████████████████┫ 15/15 [00:10<00:00, 1.4 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.6 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.6 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.5 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.6 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:09<00:00, 1.6 it/s]
100.0%┣█████████████████████

(Any[6467.551888683022, 6462.756477567312, 6435.865820693679, 6568.462593980583, 6561.840064287084, 6493.668216515634, 6508.92465758739, 6533.031587033325, 6467.465767670953, 6452.796261167634  …  9398.488055603217, 9536.270182653117, 8664.510031780317, 8649.81556813873, 8684.61457987852, 8697.171090547248, 8619.209168865504, 8563.935453178123, 8618.224851341, 8644.394127064706], 70)

In [47]:
models_1 = [
    (feats_1, 1/n_train*QuadLoss(), 0.25*QuadReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), 0.75*QuadReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), ZeroReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), 0.25*OneReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), 0.5 *OneReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), 0.75*OneReg(), .1),
    (feats_1, 1/n_train*QuadLoss(), NonNegConstraint(), .1),
    (feats_1, 1/n_train*L1Loss(), 0.25*QuadReg(), .1),
    (feats_1, 1/n_train*L1Loss(), 0.5 *QuadReg(), .1),
    (feats_1, 1/n_train*L1Loss(), 0.75*QuadReg(), .1),
    (feats_1, 1/n_train*L1Loss(), ZeroReg(), .1),
    (feats_1, 1/n_train*L1Loss(), 0.25*OneReg(), .1),
    (feats_1, 1/n_train*L1Loss(), 0.5 *OneReg(), .1),
    (feats_1, 1/n_train*L1Loss(), 0.75*OneReg(), .1),
    (feats_1, 1/n_train*L1Loss(), NonNegConstraint(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .1),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .1),
    (feats_1, 1/n_train*QuadLoss(), 0.25*QuadReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), 0.75*QuadReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), ZeroReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), 0.25*OneReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), 0.5 *OneReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), 0.75*OneReg(), .5),
    (feats_1, 1/n_train*QuadLoss(), NonNegConstraint(), .5),
    (feats_1, 1/n_train*L1Loss(), 0.25*QuadReg(), .5),
    (feats_1, 1/n_train*L1Loss(), 0.5 *QuadReg(), .5),
    (feats_1, 1/n_train*L1Loss(), 0.75*QuadReg(), .5),
    (feats_1, 1/n_train*L1Loss(), ZeroReg(), .5),
    (feats_1, 1/n_train*L1Loss(), 0.25*OneReg(), .5),
    (feats_1, 1/n_train*L1Loss(), 0.5 *OneReg(), .5),
    (feats_1, 1/n_train*L1Loss(), 0.75*OneReg(), .5),
    (feats_1, 1/n_train*L1Loss(), NonNegConstraint(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .5),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .5),
    (feats_1, 1/n_train*QuadLoss(), 0.25*QuadReg(), .9),
    (feats_1, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .9),
    (feats_1, 1/n_train*QuadLoss(), 0.75*QuadReg(), .9),
    (feats_1, 1/n_train*QuadLoss(), ZeroReg(), .9),
    (feats_1, 1/n_train*QuadLoss(), 0.25*OneReg(), .9),
    (feats_1, 1/n_train*QuadLoss(), 0.5 *OneReg(), .9),
    (feats_1, 1/n_train*QuadLoss(), 0.75*OneReg(), .9), #best model out of feature set 1
    (feats_1, 1/n_train*QuadLoss(), NonNegConstraint(), .9),
    (feats_1, 1/n_train*L1Loss(), 0.25*QuadReg(), .9),
    (feats_1, 1/n_train*L1Loss(), 0.5 *QuadReg(), .9),
    (feats_1, 1/n_train*L1Loss(), 0.75*QuadReg(), .9),
    (feats_1, 1/n_train*L1Loss(), ZeroReg(), .9),
    (feats_1, 1/n_train*L1Loss(), 0.25*OneReg(), .9),
    (feats_1, 1/n_train*L1Loss(), 0.5 *OneReg(), .9),
    (feats_1, 1/n_train*L1Loss(), 0.75*OneReg(), .9),
    (feats_1, 1/n_train*L1Loss(), NonNegConstraint(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .9),
    (feats_1, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .9),
]
errors,i = test_models(models_1,MSE;k=15)

100.0%┣██████████████████████████████████████████┫ 15/15 [00:14<00:00, 1.0 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:14<00:00, 1.0 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.0 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [00:13<00:00, 1.1 it/s]
100.0%┣█████████████████████

(Any[6305.997252683987, 6271.462380781899, 6196.889099740755, 6309.750546766214, 6393.938412442872, 6182.636198861417, 6228.272514605144, 6223.233906102328, 6239.731865914617, 6518.447705105094  …  9183.119433804932, 9293.860659800637, 8327.582702670246, 8160.179336498955, 8138.5573060351035, 8104.240355711566, 8329.811487347273, 8026.637669137322, 8178.283158899549, 8114.71306059098], 71)

In [52]:
models_2 = [
    (feats_2, 1/n_train*QuadLoss(), 0.25*QuadReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), 0.75*QuadReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), ZeroReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), 0.25*OneReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), 0.5 *OneReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), 0.75*OneReg(), .1),
    (feats_2, 1/n_train*QuadLoss(), NonNegConstraint(), .1),
    (feats_2, 1/n_train*L1Loss(), 0.25*QuadReg(), .1),
    (feats_2, 1/n_train*L1Loss(), 0.5 *QuadReg(), .1),
    (feats_2, 1/n_train*L1Loss(), 0.75*QuadReg(), .1),
    (feats_2, 1/n_train*L1Loss(), ZeroReg(), .1),
    (feats_2, 1/n_train*L1Loss(), 0.25*OneReg(), .1),
    (feats_2, 1/n_train*L1Loss(), 0.5 *OneReg(), .1),
    (feats_2, 1/n_train*L1Loss(), 0.75*OneReg(), .1),
    (feats_2, 1/n_train*L1Loss(), NonNegConstraint(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .1),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .1),
    (feats_2, 1/n_train*QuadLoss(), 0.25*QuadReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), 0.75*QuadReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), ZeroReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), 0.25*OneReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), 0.5 *OneReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), 0.75*OneReg(), .5),
    (feats_2, 1/n_train*QuadLoss(), NonNegConstraint(), .5),
    (feats_2, 1/n_train*L1Loss(), 0.25*QuadReg(), .5),
    (feats_2, 1/n_train*L1Loss(), 0.5 *QuadReg(), .5),
    (feats_2, 1/n_train*L1Loss(), 0.75*QuadReg(), .5),
    (feats_2, 1/n_train*L1Loss(), ZeroReg(), .5),
    (feats_2, 1/n_train*L1Loss(), 0.25*OneReg(), .5),
    (feats_2, 1/n_train*L1Loss(), 0.5 *OneReg(), .5),
    (feats_2, 1/n_train*L1Loss(), 0.75*OneReg(), .5),
    (feats_2, 1/n_train*L1Loss(), NonNegConstraint(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .5),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .5),
    (feats_2, 1/n_train*QuadLoss(), 0.25*QuadReg(), .9),
    (feats_2, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .9),
    (feats_2, 1/n_train*QuadLoss(), 0.75*QuadReg(), .9),
    (feats_2, 1/n_train*QuadLoss(), ZeroReg(), .9),
    (feats_2, 1/n_train*QuadLoss(), 0.25*OneReg(), .9),
    (feats_2, 1/n_train*QuadLoss(), 0.5 *OneReg(), .9),
    (feats_2, 1/n_train*QuadLoss(), 0.75*OneReg(), .9), #best model out of feature set 1
    (feats_2, 1/n_train*QuadLoss(), NonNegConstraint(), .9),
    (feats_2, 1/n_train*L1Loss(), 0.25*QuadReg(), .9),
    (feats_2, 1/n_train*L1Loss(), 0.5 *QuadReg(), .9),
    (feats_2, 1/n_train*L1Loss(), 0.75*QuadReg(), .9),
    (feats_2, 1/n_train*L1Loss(), ZeroReg(), .9),
    (feats_2, 1/n_train*L1Loss(), 0.25*OneReg(), .9),
    (feats_2, 1/n_train*L1Loss(), 0.5 *OneReg(), .9),
    (feats_2, 1/n_train*L1Loss(), 0.75*OneReg(), .9),
    (feats_2, 1/n_train*L1Loss(), NonNegConstraint(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .9),
    (feats_2, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .9)
]
errors,i = test_models(models_2,MSE;k=15)

100.0%┣██████████████████████████████████████████┫ 15/15 [01:10<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:09<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:07<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:06<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:07<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:07<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:07<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:18<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:30<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:20<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:22<00:00, 0.2 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [01:18<00:00, 0.2 it/s]
100.0%┣█████████████████████

(Any[6266.201809375093, 6179.850179292898, 6299.418558854985, 6213.9229051641705, 6234.898159116057, 6228.07048011717, 6143.61458287267, 6267.433525759858, 6286.987083409578, 6436.177974610369  …  9325.803235350766, 9090.610732403255, 8082.05111424513, 8157.811159486968, 8196.09570750356, 8224.5759449218, 8101.630097003499, 8175.535267848258, 8202.601728304004, 8156.57629233738], 67)

In [56]:
models_3 = [
    (feats_3, 1/n_train*QuadLoss(), 0.25*QuadReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), 0.75*QuadReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), ZeroReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), 0.25*OneReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), 0.5 *OneReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), 0.75*OneReg(), .1),
    (feats_3, 1/n_train*QuadLoss(), NonNegConstraint(), .1),
    (feats_3, 1/n_train*L1Loss(), 0.25*QuadReg(), .1),
    (feats_3, 1/n_train*L1Loss(), 0.5 *QuadReg(), .1),
    (feats_3, 1/n_train*L1Loss(), 0.75*QuadReg(), .1),
    (feats_3, 1/n_train*L1Loss(), ZeroReg(), .1),
    (feats_3, 1/n_train*L1Loss(), 0.25*OneReg(), .1),
    (feats_3, 1/n_train*L1Loss(), 0.5 *OneReg(), .1),
    (feats_3, 1/n_train*L1Loss(), 0.75*OneReg(), .1),
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .1)
]
errors,i = test_models(models_3,MSE;k=15)

100.0%┣██████████████████████████████████████████┫ 15/15 [03:17<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:22<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:11<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:10<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:11<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:10<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:11<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:10<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:07<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:07<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:09<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:09<00:00, 0.1 it/s]
100.0%┣█████████████████████

(Any[3357.2838335817164, 3295.2864577773826, 3287.225881237891, 3275.137054469704, 3242.045020490456, 3249.7651486432164, 3278.1923444564723, 3369.799247315818, 3465.022985453251, 3447.767046742311, 3416.937395629453, 3429.8284889978268, 3492.204392651664, 3355.8706872047155, 3403.0683547048525, 3084.937956801807], 16)

### We can see here that feature set 3 has the lowest MSE (nearly 1/2 of the previous)

In [59]:
models = [
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .8), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .7), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .6), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .5), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .4), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .3), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .2), 
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .1)
]
errors,i = test_models(models,MSE;k=15)

100.0%┣██████████████████████████████████████████┫ 15/15 [03:20<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:15<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:06<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:14<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:16<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:09<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:07<00:00, 0.1 it/s]
100.0%┣██████████████████████████████████████████┫ 15/15 [03:20<00:00, 0.1 it/s]
The best model is model 5


(Any[3005.0165206460447, 3265.2619125606057, 3385.6306728762675, 3653.047618320855, 2995.360429684404, 3410.4861248677366, 3027.7771867129504, 3041.4758933270527], 5)

In [60]:
models = [
    (feats_3, 1/n_train*QuadLoss(), 0.25*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.5 *QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.75*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), ZeroReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.25*OneReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.5 *OneReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.75*OneReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), NonNegConstraint(), .4),
    (feats_3, 1/n_train*L1Loss(), 0.25*QuadReg(), .4),
    (feats_3, 1/n_train*L1Loss(), 0.5 *QuadReg(), .4),
    (feats_3, 1/n_train*L1Loss(), 0.75*QuadReg(), .4),
    (feats_3, 1/n_train*L1Loss(), ZeroReg(), .4),
    (feats_3, 1/n_train*L1Loss(), 0.25*OneReg(), .4),
    (feats_3, 1/n_train*L1Loss(), 0.5 *OneReg(), .4),
    (feats_3, 1/n_train*L1Loss(), 0.75*OneReg(), .4),
    (feats_3, 1/n_train*L1Loss(), NonNegConstraint(), .4), #best model from above
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), 0.25*QuadReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *QuadReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), 0.75*QuadReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), ZeroReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), 0.25*OneReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), 0.5 *OneReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), 0.75*OneReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.25), NonNegConstraint(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), 0.25*QuadReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *QuadReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), 0.75*QuadReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), ZeroReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), 0.25*OneReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), 0.5 *OneReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), 0.75*OneReg(), .4),
    (feats_3, 1/n_train*QuantileLoss(quantile=0.75), NonNegConstraint(), .4),

    
    
]
errors,i = test_models(models,MSE;k=5)

100.0%┣████████████████████████████████████████████┫ 5/5 [00:59<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:58<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:57<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:56<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:56<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:56<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:56<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:55<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:56<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:56<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:57<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [00:58<00:00, 0.1 it/s]
100.0%┣█████████████████████

(Any[2800.1743285934285, 2789.8405458206967, 2761.3771345334217, 2816.478150168958, 2785.5399614826356, 2792.938879470165, 2803.255684187071, 2846.635443854345, 3168.7968558040384, 3104.5718749288612  …  5264.390079625461, 5302.852204418142, 4147.420116496212, 4121.741788289379, 4144.208322418345, 4273.109026539858, 4161.2087032088075, 4327.197801428348, 4200.065690369818, 4392.601289272541], 3)

In [62]:
models = [
    (feats_3, 1/n_train*QuadLoss(), 0.1*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.2*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.3*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.4*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.5*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.6*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.7*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.75*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.8*QuadReg(), .4),
    (feats_3, 1/n_train*QuadLoss(), 0.9*QuadReg(), .4),
]
errors,i = test_models(models,MSE;k=5)

100.0%┣████████████████████████████████████████████┫ 5/5 [01:00<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:00<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:11<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:01<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:08<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:01<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:00<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:02<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:15<00:00, 0.1 it/s]
100.0%┣████████████████████████████████████████████┫ 5/5 [01:05<00:00, 0.1 it/s]
The best model is model 4


(Any[2884.3633978625353, 2786.7439862294805, 2806.030629960462, 2754.6959239578186, 2810.143786895092, 2812.113841355829, 2799.146681333407, 2814.2536251820316, 2766.1147604900643, 2824.4815213021575], 4)

### From this analysis, we can see that Feature Set 3 with Quadratic Loss and Regualarization, that gives the best model. We can tweak the lambda and learning rate. 