In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import model_selection
import warnings
warnings.filterwarnings("ignore")

##### ElasticNet Regresyon:
##### • Ridge ve Lasso Regresyonun birleşimidir
##### • L1 ve L2 normlarını kullanarak katsayıları düzenler
##### • L1(Lasso): ortalama kare hatası + alpha*betaların mutlak değerleri toplamı
##### • L2(Ridge): ortalama kare hatası + alpha*betaların karesi toplamı
##### • L1+L2(ElasticNet): ortalama kare hatası + alpha*betaların karesi toplamı + alpha *betaların mutlak değerleri toplamı

In [3]:
df = pd.read_csv("Hitters.csv")
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
0,293,66,1,30,29,14,1,293,66,1,30,29,14,A,E,446,33,20,,A
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N


In [4]:
df.dropna(inplace=True)

df = pd.get_dummies(df, columns=["League","Division","NewLeague"])

df.drop(["League_N", "Division_W", "NewLeague_N"], axis=1, inplace=True)

df.replace(to_replace=False, value=0, inplace=True)
df.replace(to_replace=True, value=1, inplace=True)

df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,PutOuts,Assists,Errors,Salary,League_A,Division_E,NewLeague_A
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,632,43,10,475.0,0,0,0
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,880,82,14,480.0,1,0,1
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,200,11,3,500.0,0,1,0
4,321,87,10,39,42,30,2,396,101,12,48,46,33,805,40,4,91.5,0,1,0
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,282,421,25,750.0,1,0,1


In [5]:
X = df.drop("Salary", axis=1)
y = df[["Salary"]]
X.shape, y.shape

((263, 19), (263, 1))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=16)

In [7]:
enet_model = ElasticNet().fit(X_train, y_train)

In [8]:
enet_model.intercept_ # b0 sabiti

array([184.68933364])

In [9]:
enet_model.coef_ # beta katsayıları

array([ -1.97650302,   8.90042138,   2.93083146,  -6.56818077,
        -0.46455416,   6.90466488,  -8.83651016,  -0.07565638,
        -0.48562791,  -1.20835505,   1.99220298,   1.29540323,
        -1.07382273,   0.21052626,   0.34950757,  -3.92992057,
       -10.94964458,  37.23356663,  -5.30159537])

In [10]:
y_pred = enet_model.predict(X_test)

In [11]:
r2_score(y_test, y_pred)

0.5929682862054741

In [12]:
np.sqrt(mean_squared_error(y_test, y_pred))

298.5484622481025

In [13]:
# Model Tuning

In [14]:
enet_cv = ElasticNetCV(cv = 10).fit(X_train, y_train)

In [15]:
enet_cv.alpha_

1099.2055315878792

In [16]:
enet_cv.intercept_

95.2184546126212

In [17]:
enet_cv.coef_

array([ 0.23472218,  0.4690107 , -0.        ,  0.        ,  0.        ,
        0.38080689, -0.        , -0.27972917,  0.71203775,  0.        ,
        0.63667009,  0.7112804 , -0.14831023,  0.19005493,  0.16323093,
       -0.        , -0.        ,  0.        , -0.        ])

In [18]:
# Final Model

In [20]:
enet_tuned = ElasticNet(alpha=enet_cv.alpha_).fit(X_train, y_train)

In [21]:
y_pred = enet_tuned.predict(X_test)

In [22]:
np.sqrt(mean_squared_error(y_test, y_pred))

306.6493609990647