In [1]:
import pandas as pd
import numpy as np 
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.model_selection import cross_validate, GridSearchCV

In [2]:
df = pd.read_csv("https://liangfgithub.github.io/Data/Ames_data.csv")
testID = pd.read_csv(
    'https://liangfgithub.github.io/Data/project1_testIDs.dat',delim_whitespace=' ',header=None)


In [3]:
def numeric_convert(frame):
    # We may want to normalize data as well 
    for col in frame:
        try:
            frame[col] = pd.to_numeric(frame[col])
        except:
            frame[col] = pd.factorize(frame[col])[0]
    
    return frame

def get_split(frame, index):

    frame = frame.drop('Garage_Yr_Blt', axis=1)

    num_rows = np.arange(len(frame))

    test_index = testID.iloc[:,index]
    train_index = np.array([i for i in num_rows if i not in test_index])

    xtest = numeric_convert(frame.iloc[test_index,1:-1].copy())
    xtrain = numeric_convert(frame.iloc[train_index,1:-1].copy())

    # convert to log to get better model
    ytest = np.log(frame.iloc[test_index,-1].copy())
    ytrain = np.log(frame.iloc[train_index,-1].copy())

    return xtrain,xtest,ytrain,ytest




In [4]:
# Set up data for use with scikit-learn
frame = df.drop('Garage_Yr_Blt', axis=1)
cvsplits = []
num_rows = np.arange(len(frame))
for index in range(0,10):
    test_index = testID.iloc[:,index]
    train_index = np.array([i for i in num_rows if i not in test_index])
    cvsplits.append((train_index, test_index.values))

x = numeric_convert(frame.iloc[:,1:-1].copy())

# convert to log to get better model
y = np.log(frame.iloc[:,-1].copy())

In [5]:
lasso = Lasso()
lambda_dict = {
    'alpha': np.exp(np.linspace(-1, -8, num=80))
}
lasso_cv_res = GridSearchCV(lasso, lambda_dict, verbose=4, cv=cvsplits, scoring='neg_root_mean_squared_error', return_train_score=True).fit(x,y)
#for i in range(0,10):
#    print(f'Split {i} lowest RMS, and runtime, lambda for lowest RMS: {np.min(np.abs(lasso_cv_res.cv_results_[f'split{i}_test_score']))}, )
#np.min(np.abs(las.cv_results_['split0_test_score']))


Fitting 10 folds for each of 80 candidates, totalling 800 fits
[CV 1/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.184) total time=   0.1s
[CV 2/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.171) total time=   0.1s
[CV 3/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.209) total time=   0.1s
[CV 4/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.208) total time=   0.1s
[CV 5/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.192) total time=   0.1s
[CV 6/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.184) total time=   0.1s
[CV 7/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.171) total time=   0.1s
[CV 8/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.209) total time=   0.1s
[CV 9/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.208) total time=   0.1s
[CV 10/10] END alpha=0.36787944117144233;, score=(train=-0.186, test=-0.192) total

[CV 9/10] END alpha=0.1810719596689109;, score=(train=-0.185, test=-0.206) total time=   0.1s
[CV 10/10] END alpha=0.1810719596689109;, score=(train=-0.185, test=-0.191) total time=   0.1s
[CV 1/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.183) total time=   0.1s
[CV 2/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.170) total time=   0.1s
[CV 3/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.206) total time=   0.1s
[CV 4/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.205) total time=   0.1s
[CV 5/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.191) total time=   0.1s
[CV 6/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.183) total time=   0.1s
[CV 7/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.170) total time=   0.1s
[CV 8/10] END alpha=0.1657178965861574;, score=(train=-0.185, test=-0.206) total time=   0.1s
[CV 9/10] END alpha=0.1657178965861574;, score=(train=-0.18

[CV 7/10] END alpha=0.08156711392056668;, score=(train=-0.184, test=-0.169) total time=   0.1s
[CV 8/10] END alpha=0.08156711392056668;, score=(train=-0.184, test=-0.205) total time=   0.1s
[CV 9/10] END alpha=0.08156711392056668;, score=(train=-0.184, test=-0.204) total time=   0.0s
[CV 10/10] END alpha=0.08156711392056668;, score=(train=-0.184, test=-0.190) total time=   0.1s
[CV 1/10] END alpha=0.07465060064648213;, score=(train=-0.184, test=-0.183) total time=   0.1s
[CV 2/10] END alpha=0.07465060064648213;, score=(train=-0.184, test=-0.169) total time=   0.1s
[CV 3/10] END alpha=0.07465060064648213;, score=(train=-0.184, test=-0.205) total time=   0.0s
[CV 4/10] END alpha=0.07465060064648213;, score=(train=-0.184, test=-0.204) total time=   0.1s
[CV 5/10] END alpha=0.07465060064648213;, score=(train=-0.184, test=-0.190) total time=   0.1s
[CV 6/10] END alpha=0.07465060064648213;, score=(train=-0.184, test=-0.183) total time=   0.1s
[CV 7/10] END alpha=0.07465060064648213;, score=(

[CV 4/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.203) total time=   0.1s
[CV 5/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.189) total time=   0.1s
[CV 6/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.182) total time=   0.2s
[CV 7/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.169) total time=   0.2s
[CV 8/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.204) total time=   0.1s
[CV 9/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.203) total time=   0.1s
[CV 10/10] END alpha=0.036743370345668276;, score=(train=-0.183, test=-0.189) total time=   0.1s
[CV 1/10] END alpha=0.03362770281110396;, score=(train=-0.182, test=-0.182) total time=   0.1s
[CV 2/10] END alpha=0.03362770281110396;, score=(train=-0.182, test=-0.168) total time=   0.2s
[CV 3/10] END alpha=0.03362770281110396;, score=(train=-0.182, test=-0.203) total time=   0.1s
[CV 4/10] END alpha=0.03362770281110396;, 

[CV 2/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.159) total time=   0.1s
[CV 3/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.190) total time=   0.0s
[CV 4/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.188) total time=   0.0s
[CV 5/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.176) total time=   0.0s
[CV 6/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.168) total time=   0.0s
[CV 7/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.159) total time=   0.0s
[CV 8/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.190) total time=   0.0s
[CV 9/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.188) total time=   0.0s
[CV 10/10] END alpha=0.016551710603291556;, score=(train=-0.170, test=-0.176) total time=   0.0s
[CV 1/10] END alpha=0.015148202245646872;, score=(train=-0.169, test=-0.167) total time=   0.0s
[CV 2/10] END alpha=0.01514820224564687

[CV 1/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.157) total time=   0.0s
[CV 2/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.152) total time=   0.0s
[CV 3/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.181) total time=   0.0s
[CV 4/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.178) total time=   0.0s
[CV 5/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.166) total time=   0.1s
[CV 6/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.157) total time=   0.1s
[CV 7/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.152) total time=   0.0s
[CV 8/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.181) total time=   0.0s
[CV 9/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.178) total time=   0.0s
[CV 10/10] END alpha=0.007456015093819829;, score=(train=-0.161, test=-0.166) total time=   0.0s
[CV 1/10] END alpha=0.00682377956543763

[CV 8/10] END alpha=0.003669885058158934;, score=(train=-0.155, test=-0.174) total time=   0.0s
[CV 9/10] END alpha=0.003669885058158934;, score=(train=-0.155, test=-0.171) total time=   0.0s
[CV 10/10] END alpha=0.003669885058158934;, score=(train=-0.155, test=-0.161) total time=   0.0s
[CV 1/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.150) total time=   0.0s
[CV 2/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.149) total time=   0.0s
[CV 3/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.174) total time=   0.0s
[CV 4/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.170) total time=   0.0s
[CV 5/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.160) total time=   0.1s
[CV 6/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.150) total time=   0.0s
[CV 7/10] END alpha=0.003358695811671193;, score=(train=-0.155, test=-0.149) total time=   0.0s
[CV 8/10] END alpha=0.00335869581167119

[CV 3/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.170) total time=   0.1s
[CV 4/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.166) total time=   0.0s
[CV 5/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.158) total time=   0.0s
[CV 6/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.147) total time=   0.0s
[CV 7/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.148) total time=   0.0s
[CV 8/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.170) total time=   0.0s
[CV 9/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.166) total time=   0.0s
[CV 10/10] END alpha=0.0016531655876568638;, score=(train=-0.152, test=-0.158) total time=   0.0s
[CV 1/10] END alpha=0.001512984806681538;, score=(train=-0.152, test=-0.147) total time=   0.0s
[CV 2/10] END alpha=0.001512984806681538;, score=(train=-0.152, test=-0.148) total time=   0.0s
[CV 3/10] END alpha=0.001512984

[CV 9/10] END alpha=0.0008136957359210878;, score=(train=-0.151, test=-0.165) total time=   0.1s
[CV 10/10] END alpha=0.0008136957359210878;, score=(train=-0.151, test=-0.156) total time=   0.0s
[CV 1/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.146) total time=   0.1s
[CV 2/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.148) total time=   0.0s
[CV 3/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.169) total time=   0.0s
[CV 4/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.165) total time=   0.0s
[CV 5/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.156) total time=   0.0s
[CV 6/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.146) total time=   0.0s
[CV 7/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.148) total time=   0.0s
[CV 8/10] END alpha=0.0007446981082246495;, score=(train=-0.151, test=-0.169) total time=   0.0s
[CV 9/10] END alpha=0.0007446

[CV 6/10] END alpha=0.0003665438475946984;, score=(train=-0.150, test=-0.146) total time=   0.1s
[CV 7/10] END alpha=0.0003665438475946984;, score=(train=-0.150, test=-0.149) total time=   0.0s
[CV 8/10] END alpha=0.0003665438475946984;, score=(train=-0.150, test=-0.168) total time=   0.0s
[CV 9/10] END alpha=0.0003665438475946984;, score=(train=-0.150, test=-0.164) total time=   0.0s
[CV 10/10] END alpha=0.0003665438475946984;, score=(train=-0.150, test=-0.156) total time=   0.0s
[CV 1/10] END alpha=0.00033546262790251185;, score=(train=-0.150, test=-0.146) total time=   0.0s
[CV 2/10] END alpha=0.00033546262790251185;, score=(train=-0.150, test=-0.149) total time=   0.1s
[CV 3/10] END alpha=0.00033546262790251185;, score=(train=-0.150, test=-0.168) total time=   0.0s
[CV 4/10] END alpha=0.00033546262790251185;, score=(train=-0.150, test=-0.164) total time=   0.0s
[CV 5/10] END alpha=0.00033546262790251185;, score=(train=-0.150, test=-0.156) total time=   0.0s
[CV 6/10] END alpha=0.00

In [182]:
q.cv_results_.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_alpha', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'split5_test_score', 'split6_test_score', 'split7_test_score', 'split8_test_score', 'split9_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'split3_train_score', 'split4_train_score', 'split5_train_score', 'split6_train_score', 'split7_train_score', 'split8_train_score', 'split9_train_score', 'mean_train_score', 'std_train_score'])