In [4]:
from sklearn.datasets import fetch_california_housing
# i use _california_housing since newest versions 
# of sklearn do not allow you to use the boston dataset
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

cal_data, cal_target = fetch_california_housing(return_X_y=True)

data_train, data_test, target_train, target_test = train_test_split(cal_data, cal_target)

linreg = LinearRegression()

In [5]:
linreg.fit(data_train, target_train)
print(f'R² train score linreg: {linreg.score(data_train, target_train)}')
print(f'R² test score linreg: {linreg.score(data_test, target_test)}')

# no sign of overfitting

R² train score linreg: 0.6080084087016805
R² test score linreg: 0.5998769181903001


In [6]:
from sklearn.linear_model import Ridge

for alpha in range(1,10):
    ridge = Ridge(alpha=alpha)
    ridge.fit(data_train, target_train)

    print(f'R² score Ridge lasso train with alpha {alpha}: {ridge.score(data_train, target_train)}')
    print(f'R² score Ridge lasso test with alpha {alpha}: {ridge.score(data_test, target_test)}')
    print('\n')

# 1 is the optimal alpha, none of them are overfitting

R² score Ridge lasso train with alpha 1: 0.6080083737062955
R² score Ridge lasso test with alpha 1: 0.5998599571363408


R² score Ridge lasso train with alpha 2: 0.6080082692116916
R² score Ridge lasso test with alpha 2: 0.5998430059244478


R² score Ridge lasso train with alpha 3: 0.6080080959486998
R² score Ridge lasso test with alpha 3: 0.5998260647432547


R² score Ridge lasso train with alpha 4: 0.6080078546395816
R² score Ridge lasso test with alpha 4: 0.5998091337776933


R² score Ridge lasso train with alpha 5: 0.6080075459981424
R² score Ridge lasso test with alpha 5: 0.5997922132090523


R² score Ridge lasso train with alpha 6: 0.6080071707298428
R² score Ridge lasso test with alpha 6: 0.5997753032150357


R² score Ridge lasso train with alpha 7: 0.6080067295319076
R² score Ridge lasso test with alpha 7: 0.5997584039698196


R² score Ridge lasso train with alpha 8: 0.6080062230934349
R² score Ridge lasso test with alpha 8: 0.5997415156441095


R² score Ridge lasso train with 

In [7]:
from sklearn.model_selection import cross_val_score

alpha_cross_scores = {}

for alpha in range(1,20):
    ridge = Ridge(alpha=alpha)
    alpha_cross_scores[alpha] = cross_val_score(ridge, cal_data, cal_target)

for score in alpha_cross_scores:
    print(f'Score: {score}, \nAlpha cross validation scores: {alpha_cross_scores[score]},\
        \nAverage R² score: {sum(alpha_cross_scores[score])/len(alpha_cross_scores[score])}')

# the score increases as i increase the number


Score: 1, 
Alpha cross validation scores: [0.54878594 0.46817341 0.55078466 0.53693584 0.66053068],        
Average R² score: 0.5530421056931839
Score: 2, 
Alpha cross validation scores: [0.54890772 0.46813998 0.55078491 0.53688474 0.66054715],        
Average R² score: 0.5530528972869166
Score: 3, 
Alpha cross validation scores: [0.54902857 0.46810662 0.55078509 0.53683372 0.66056346],        
Average R² score: 0.553063490967508
Score: 4, 
Alpha cross validation scores: [0.54914851 0.46807332 0.55078521 0.53678278 0.66057962],        
Average R² score: 0.5530738888665226
Score: 5, 
Alpha cross validation scores: [0.54926754 0.4680401  0.55078527 0.53673193 0.66059563],        
Average R² score: 0.553084093089087
Score: 6, 
Alpha cross validation scores: [0.54938567 0.46800694 0.55078527 0.53668116 0.66061149],        
Average R² score: 0.5530941057142672
Score: 7, 
Alpha cross validation scores: [0.54950291 0.46797386 0.5507852  0.53663047 0.6606272 ],        
Average R² score: 0.5531

In [17]:
from sklearn.model_selection import cross_val_score

alpha_cross_scores = {}

for alpha in range(1,100):
    ridge = Ridge(alpha=alpha)
    alpha_cross_scores[alpha] = cross_val_score(ridge, cal_data, cal_target)

score_avrg = {}
for alpha in alpha_cross_scores:
    cross_scores = alpha_cross_scores[alpha]
    avrg = sum(cross_scores)/len(cross_scores)

    score_avrg[avrg] = alpha
    
maximum_score = score_avrg[max(score_avrg)]

print(f'maximum score: {max(score_avrg)},') 
print(f'alpha of said score: {maximum_score}')

# optimal whole number alpha is 81, and therefore my prefered alpha


maximum score: 0.5534208733523284,
alpha of said score:81
