In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv("Hitters.csv")
df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,League,Division,PutOuts,Assists,Errors,Salary,NewLeague
0,293,66,1,30,29,14,1,293,66,1,30,29,14,A,E,446,33,20,,A
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,N,W,632,43,10,475.0,N
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,A,W,880,82,14,480.0,A
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,N,E,200,11,3,500.0,N
4,321,87,10,39,42,30,2,396,101,12,48,46,33,N,E,805,40,4,91.5,N


In [3]:
df.dropna(inplace=True)

df = pd.get_dummies(df, columns=["League","Division","NewLeague"])

df.drop(["League_N", "Division_W", "NewLeague_N"], axis=1, inplace=True)

df.replace(to_replace=False, value=0, inplace=True)
df.replace(to_replace=True, value=1, inplace=True)

df.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,PutOuts,Assists,Errors,Salary,League_A,Division_E,NewLeague_A
1,315,81,7,24,38,39,14,3449,835,69,321,414,375,632,43,10,475.0,0,0,0
2,479,130,18,66,72,76,3,1624,457,63,224,266,263,880,82,14,480.0,1,0,1
3,496,141,20,65,78,37,11,5628,1575,225,828,838,354,200,11,3,500.0,0,1,0
4,321,87,10,39,42,30,2,396,101,12,48,46,33,805,40,4,91.5,0,1,0
5,594,169,4,74,51,35,11,4408,1133,19,501,336,194,282,421,25,750.0,1,0,1


In [4]:
X = df.drop("Salary", axis=1)
y = df[["Salary"]]
X.shape, y.shape

((263, 19), (263, 1))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=16)

In [16]:
svr_model = SVR(kernel='linear').fit(X_train, y_train)

In [17]:
svr_model.get_params()

{'C': 1.0,
 'cache_size': 200,
 'coef0': 0.0,
 'degree': 3,
 'epsilon': 0.1,
 'gamma': 'scale',
 'kernel': 'linear',
 'max_iter': -1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [18]:
y_preds = svr_model.predict(X_train)

r2_score(y_train, y_preds)

0.43251350999633253

In [19]:
svr_model.intercept_

array([-43.68201367])

In [20]:
svr_model.coef_

array([[-1.6241214 ,  9.72279459, -0.98342195, -5.39765402, -2.9656514 ,
         7.06877859,  6.46381915,  0.09273283, -0.70230298, -0.15078809,
         0.89018284,  1.26670416, -0.95020777,  0.11603867, -0.05841226,
         1.55916962, -4.00240605,  3.10405115, -1.00240605]])

In [21]:
# test hatası
y_pred = svr_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
rmse

322.1290075649643

In [12]:
# Model Tuning

In [22]:
svr = SVR()
svr.get_params()

{'C': 1.0,
 'cache_size': 200,
 'coef0': 0.0,
 'degree': 3,
 'epsilon': 0.1,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [None]:
svr_params = {
    "kernel": ["rbf","linear","sigmoid"],
    "C": [0.01, 0.1, 0.5, 1, 3],
    "gamma": ["scale", "auto"],
    "epsilon": [0.01, 0.1, 1, 10]
}
svr_search = GridSearchCV(estimator=svr, param_grid=svr_params, cv=5, verbose=2, n_jobs=-1).fit(X_train, y_train)


Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [24]:
svr_search.best_params_

{'C': 0.01, 'kernel': 'linear'}

In [25]:
# Final Modeli
svr_tuned = SVR(kernel="linear", C=0.01).fit(X_train, y_train)

In [26]:
y_pred = svr_tuned.predict(X_test)

In [27]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
rmse

316.96767846869056

In [28]:
r2_score(y_test, y_pred)

0.5070147580357429