In [3]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.svm import SVR

In [4]:
from warnings import filterwarnings
filterwarnings('ignore')

In [5]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

## Model & Tahmin

In [6]:
svr_model = SVR("linear").fit(X_train, y_train)

In [7]:
svr_model

SVR(kernel='linear')

In [8]:
svr_model.predict(X_train)[0:5]

array([219.32622627, 702.43039317, 623.20559641, 153.77538484,
       463.15191157])

In [9]:
svr_model.predict(X_test)[0:5]

array([679.14754919, 633.72883529, 925.68639938, 270.28464317,
       530.26659421])

In [10]:
svr_model.intercept_

array([-80.15196063])

In [11]:
svr_model.coef_

array([[ -1.2183904 ,   6.09602978,  -3.67574533,   0.14217072,
          0.51435925,   1.28388992,  12.55922527,  -0.08693754,
          0.46597185,   2.98259931,   0.52944513,  -0.79820793,
         -0.16015531,   0.30872795,   0.28842348,  -1.79560066,
          6.41868986, -10.74313785,   1.33374319]])

In [12]:
#test
y_pred = svr_model.predict(X_test)
np.sqrt(mean_squared_error(y_test,y_pred))

370.0408415795005

## Model Tuning

In [13]:
svr_model = SVR("linear") 

In [14]:
svr_model

SVR(kernel='linear')

In [15]:
svr_params = {"C": [0.1,0.5,1,3]}

In [16]:
svr_cv_model = GridSearchCV(svr_model, svr_params, cv = 5).fit(X_train, y_train)

In [17]:
svr_cv_model.best_params_

{'C': 0.5}

In [18]:
svr_cv_model = GridSearchCV(svr_model, svr_params, cv = 5, verbose = 2, n_jobs = -1).fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [91]:
svr_cv_model.best_params_

{'C': 0.5}

In [92]:
svr_tuned = SVR("linear", C = 0.5).fit(X_train, y_train)

In [93]:
y_pred = svr_tuned.predict(X_test)

In [94]:
np.sqrt(mean_squared_error(y_test, y_pred))

367.9874739022889