# Ridge Regression

In [91]:
import pandas as pd
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso, ElasticNet
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV

df = pd.read_csv('./example2.csv')
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df[['Salary']]
x_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
x = pd.concat([x_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)
 
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [92]:
df.shape

(263, 20)

In [93]:
model = Ridge(alpha=0.1).fit(x_train, y_train)
model

Ridge(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [94]:
model.coef_

array([[ -1.77435737,   8.80240528,   7.29595605,  -3.33257639,
         -2.08316481,   5.42531283,   7.58514945,  -0.13752764,
         -0.20779701,  -0.60361067,   1.7927957 ,   0.72866408,
         -0.68710375,   0.26153564,   0.26888652,  -0.52674278,
        112.14640272, -99.80997876, -48.07152768]])

In [95]:
model.intercept_

array([-4.57862691])

In [96]:
lambdas = 10**np.linspace(10,-2,100)*0.05

In [97]:
model = Ridge()
katsayilar = []

for i in lambdas:
    model.set_params(alpha = i)
    model.fit(x_train, y_train)
    katsayilar.append(model.coef_)
    

In [98]:
model = model.fit(x_train, y_train)

In [99]:
y_pred = model.predict(x_train)
y_pred[0:10]

array([[ 377.44426057],
       [ 802.19524262],
       [ 495.61015184],
       [ 112.53191049],
       [ 426.21687459],
       [1004.23563724],
       [ 154.72909593],
       [ 362.56199095],
       [ 485.37561246],
       [ 918.71404373]])

In [100]:
RMSE = np.sqrt(mean_squared_error(y_train, y_pred))
RMSE

289.3292825614703

In [101]:
errorcvs = -cross_val_score(model, x_train, y_train, cv=10, scoring="neg_mean_squared_error")
np.sqrt(np.mean(errorcvs))

351.82639292149247

In [102]:
y_pred = model.predict(x_test)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

357.0531409274788

In [103]:
lambdas2 = 10**np.linspace(10,-2,100)*0.05
lambdas1 = np.random.randint(0,1000,100)

ridgecv = RidgeCV(alphas = lambdas2, scoring="neg_mean_squared_error", cv=10, normalize = True)
ridgecv.fit(x_train, y_train)


RidgeCV(alphas=array([5.00000000e+08, 3.78231664e+08, 2.86118383e+08, 2.16438064e+08,
       1.63727458e+08, 1.23853818e+08, 9.36908711e+07, 7.08737081e+07,
       5.36133611e+07, 4.05565415e+07, 3.06795364e+07, 2.32079442e+07,
       1.75559587e+07, 1.32804389e+07, 1.00461650e+07, 7.59955541e+06,
       5.74878498e+06, 4.34874501e+06, 3.28966612e+06, 2.48851178e+06,
       1.88246790e+06, 1.42401793e+0...
       3.28966612e-02, 2.48851178e-02, 1.88246790e-02, 1.42401793e-02,
       1.07721735e-02, 8.14875417e-03, 6.16423370e-03, 4.66301673e-03,
       3.52740116e-03, 2.66834962e-03, 2.01850863e-03, 1.52692775e-03,
       1.15506485e-03, 8.73764200e-04, 6.60970574e-04, 5.00000000e-04]),
        cv=10, fit_intercept=True, gcv_mode=None, normalize=True,
        scoring='neg_mean_squared_error', store_cv_values=False)

In [104]:
ridgecv.alpha_

0.708737081463401

In [105]:
#final model
tuned = Ridge(alpha = ridgecv.alpha_).fit(x_train, y_train)

In [106]:
y_pred = tuned.predict(x_test)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

356.8696112502694

# Lasso Regression

In [107]:
df = pd.read_csv('./example2.csv')
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df[['Salary']]
x_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
x = pd.concat([x_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)
 
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,A,W,282,421,25,750.0,A


In [108]:
df.shape

(263, 20)

In [109]:
lm_model = Lasso().fit(x_train, y_train)
lm_model

  positive)


Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [110]:
lm_model.intercept_

array([-5.58745068])

In [111]:
lm_model.coef_

array([-1.74875691e+00,  8.59204135e+00,  6.67993798e+00, -3.06715333e+00,
       -1.91843070e+00,  5.32372890e+00,  8.39184117e+00, -1.63172447e-01,
       -8.22311277e-02, -3.93602861e-01,  1.71118530e+00,  6.55730545e-01,
       -6.48379405e-01,  2.59815358e-01,  2.73041157e-01, -4.41440454e-01,
        8.54474011e+01, -9.59701213e+01, -2.13086605e+01])

In [112]:
coefs = []
for i in lambdas:
    lm_model.set_params(alpha = i)
    lm_model.fit(x_train, y_train)
    coefs.append(model.coef_)

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


In [113]:
lm_model

Lasso(alpha=0.0005, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [114]:
lm_model.predict(x_train)[0:5]

array([378.29686738, 781.47041138, 494.62744243, 116.60604915,
       424.87397548])

In [115]:
lm_model.predict(x_test)[0:5]

array([ 614.40875362,  702.52570015, 1005.16527371,  414.28857279,
        398.06512012])

In [116]:
y_pred = lm_model.predict(x_test)

In [133]:
np.sqrt((mean_squared_error(y_test, y_pred)))

357.1676548181245

In [118]:
r2_score(y_test, y_pred)

0.4117935742286186

In [119]:
lassocv = LassoCV(alphas = lambdas, cv=10, max_iter=100000)
lassocv.fit(x_train, y_train)
lassocv.alpha_

  y = column_or_1d(y, warn=True)


188.2467903396232

In [120]:
lassotuned = Lasso().set_params(alpha = lassocv.alpha_).fit(x_train, y_train)

In [121]:
y_pred = lassotuned.predict(x_test)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

363.13759971441533

In [122]:
pd.Series(lassotuned.coef_, index = x_train.columns)

AtBat         -1.120834
Hits           5.516956
HmRun          0.000000
Runs           0.000000
RBI            0.000000
Walks          2.772763
Years          0.000000
CAtBat        -0.170548
CHits          0.231297
CHmRun        -0.000000
CRuns          1.059591
CRBI           0.472247
CWalks        -0.209293
PutOuts        0.272930
Assists        0.176412
Errors        -0.000000
League_N       0.000000
Division_W    -0.000000
NewLeague_N    0.000000
dtype: float64

# Lasso Regression

In [123]:
eletmodel = ElasticNet().fit(x_train, y_train)

  positive)


In [124]:
eletmodel.coef_

array([ -1.86256172,   8.70489065,   5.10426375,  -2.89875799,
        -1.28642985,   5.24343682,   6.04480276,  -0.14701495,
        -0.21566628,  -0.7897201 ,   1.80813117,   0.80914508,
        -0.61262382,   0.26816203,   0.27172387,  -0.36530729,
        19.2186222 , -31.16586592,   8.98369938])

In [125]:
eletmodel.intercept_

array([-6.4659556])

In [126]:
eletmodel.predict(x_train)[0:5]

array([325.74706292, 776.06632333, 522.86508419, 107.64091955,
       449.03139566])

In [130]:
y_pred = eletmodel.predict(x_test)

In [131]:
np.sqrt(mean_squared_error(y_test, y_pred))

357.1676548181245

In [135]:
r2_score(y_test, y_pred)

0.4107022246932689

In [136]:
enetmodelcv = ElasticNetCV(cv = 10).fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


In [137]:
enetmodelcv.alpha_

5230.7647364798695

In [138]:
enetmodelcv.intercept_

-38.5194055839429

In [139]:
enetmodelcv.coef_

array([ 0.62845434,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.09788752,  0.        ,
        0.27265769,  0.19270075,  0.00758665,  0.3106529 ,  0.        ,
       -0.        ,  0.        , -0.        ,  0.        ])

In [141]:
enetmodeltuned = ElasticNet(alpha = enetmodelcv.alpha_).fit(x_train, y_train)

In [142]:
y_pred = enetmodeltuned.predict(x_test)
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

394.15280563218795