### Dogrusal olmayan regresyon modelleri

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import neighbors
from sklearn.svm import SVR

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

### KNN

In [3]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[['League','Division','NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary','League','Division','NewLeague'],axis=1).astype('float64')
X = pd.concat([X_,dms[['League_N','Division_W','NewLeague_N']]],axis=1)
X_train,X_test,y_train,y_test = train_test_split(X,
                                                y,
                                                test_size=0.25,
                                                random_state=42)

In [4]:
X_train.head()

Unnamed: 0,AtBat,Hits,HmRun,Runs,RBI,Walks,Years,CAtBat,CHits,CHmRun,CRuns,CRBI,CWalks,PutOuts,Assists,Errors,League_N,Division_W,NewLeague_N
183,328.0,91.0,12.0,51.0,43.0,33.0,2.0,342.0,94.0,12.0,51.0,44.0,33.0,145.0,59.0,8.0,1,0,1
229,514.0,144.0,0.0,67.0,54.0,79.0,9.0,4739.0,1169.0,13.0,583.0,374.0,528.0,229.0,453.0,15.0,1,0,1
286,593.0,152.0,23.0,69.0,75.0,53.0,6.0,2765.0,686.0,133.0,369.0,384.0,321.0,315.0,10.0,6.0,0,1,0
102,233.0,49.0,2.0,41.0,23.0,18.0,8.0,1350.0,336.0,7.0,166.0,122.0,106.0,102.0,132.0,10.0,0,0,0
153,341.0,95.0,6.0,48.0,42.0,20.0,10.0,2964.0,808.0,81.0,379.0,428.0,221.0,158.0,4.0,5.0,1,1,1


## Model

In [5]:
knn_model = KNeighborsRegressor().fit(X_train,y_train)

In [6]:
knn_model.n_neighbors

5

In [7]:
knn_model.metric

'minkowski'

In [8]:
knn_model.predict(X_test)[0:10]

array([ 510.3334,  808.3334,  772.5   ,  125.5   , 1005.    ,  325.5   ,
        216.5   ,  101.5   ,  982.    ,  886.6666])

In [9]:
y_pred = knn_model.predict(X_test)

In [10]:
np.sqrt(mean_squared_error(y_test,y_pred))

426.6570764525201

## Model Tuning

In [11]:
RMSE = []
for k in range(10):
    k = k+1
    knn_model = KNeighborsRegressor(n_neighbors=k).fit(X_train, y_train)
    y_pred= knn_model.predict(X_test)
    rmse= np.sqrt(mean_squared_error(y_test, y_pred))
    RMSE.append(rmse)
    print("k=",k,"icin rmse degeri",rmse)

k= 1 icin rmse degeri 455.03925390751965
k= 2 icin rmse degeri 415.99629571490965
k= 3 icin rmse degeri 420.6765370082348
k= 4 icin rmse degeri 428.8564674588792
k= 5 icin rmse degeri 426.6570764525201
k= 6 icin rmse degeri 423.5071669008732
k= 7 icin rmse degeri 414.9361222421057
k= 8 icin rmse degeri 413.7094731463598
k= 9 icin rmse degeri 417.84419990871265
k= 10 icin rmse degeri 421.6252180741266


In [12]:
# GridSearchCV

In [13]:
knn_params = {"n_neighbors":np.arange(1,30,1)}

In [14]:
knn=KNeighborsRegressor()

In [15]:
knn_cv_model = GridSearchCV(knn,knn_params,cv=10).fit(X_train,y_train)

In [16]:
knn_cv_model.best_params_

{'n_neighbors': 8}

In [17]:
# final model
knn_tuned = KNeighborsRegressor(n_neighbors=knn_cv_model.best_params_["n_neighbors"]).fit(X_train,y_train)

In [18]:
y_pred = knn_tuned.predict(X_test)

In [19]:
np.sqrt(mean_squared_error(y_test,y_pred))

413.7094731463598