In [1]:
# Nonlinear Regression Models

  # KNN (K-nearest Neighbours Regression)                              # Random Forests
  # SVR (Support Vector Regression)                                    # Gradient Boosting Machines(GBM)
  # ANN (Artificial Neural Network)                                    # Extreme Gradient Boosting (XGBoost)
  # Classification and Regression Tress (CART)                         # LightGBM  
  # Bagging (Bootstrap Aggregation)                                    # CatBoost
  

In [30]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score, ShuffleSplit
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings

In [31]:
data = pd.read_csv("C:/Users/Derya/Downloads/Team_Basketball.csv")
df = data.copy()
df = df.dropna()
dms = pd.get_dummies(df[["League","Division","NewLeague"]])
y = df["Salary"]
X_ = df.drop(["Salary","League","Division","NewLeague"], axis=1).astype("float")
X = pd.concat([X_,dms[["League_N","Division_W","NewLeague_N"]]], axis=1)
X_train, X_test,y_train, y_test = train_test_split(X,y, test_size=0.25,random_state=42)

In [32]:
knn_model = KNeighborsRegressor().fit(X_train, y_train)

In [33]:
knn_model.n_neighbors

5

In [34]:
# Prediction()

In [35]:
y_predict = knn_model.predict(X_test)

In [36]:
np.sqrt(mean_squared_error(y_test, y_predict))

426.6570764525201

In [37]:
RMSE = []

for k in range(10):
    k += 1
    knn_model = KNeighborsRegressor(n_neighbors = k).fit(X_train, y_train)
    y_pred = knn_model.predict(X_train)
    rmse = np.sqrt(mean_squared_error(y_train,y_pred))
    RMSE.append(rmse)
    print("k = ", k," RMSE VALUE: ", rmse)

k =  1  RMSE VALUE:  0.0
k =  2  RMSE VALUE:  179.52761335480352
k =  3  RMSE VALUE:  205.20157172291863
k =  4  RMSE VALUE:  220.5139794876305
k =  5  RMSE VALUE:  239.6467132541376
k =  6  RMSE VALUE:  243.5904190007242
k =  7  RMSE VALUE:  258.1478781634636
k =  8  RMSE VALUE:  266.05374203349805
k =  9  RMSE VALUE:  269.73782093553376
k =  10  RMSE VALUE:  271.2798300436963


In [38]:
# Model Tuning

In [39]:
knn_params = {"n_neighbors": np.arange(1,30)}

In [40]:
knn = KNeighborsRegressor()

In [41]:
knn_cv_model = GridSearchCV(knn, knn_params, cv=10)

In [42]:
knn_cv_model.fit(X_train,y_train)

In [43]:
knn_cv_model.best_params_["n_neighbors"]

8

In [44]:
RMSE = [] 
RMSE_CV = []
for k in range(10):
    k = k+1
    knn_model = KNeighborsRegressor(n_neighbors = k).fit(X_train, y_train)
    y_pred = knn_model.predict(X_train) 
    rmse = np.sqrt(mean_squared_error(y_train,y_pred)) 
    rmse_cv = np.sqrt(-1*cross_val_score(knn_model, X_train, y_train, cv=10, 
                                         scoring = "neg_mean_squared_error").mean())
    RMSE.append(rmse) 
    RMSE_CV.append(rmse_cv)
    print("k =" , k , "için RMSE değeri: ", rmse, "RMSE_CV değeri: ", rmse_cv )

k = 1 için RMSE değeri:  0.0 RMSE_CV değeri:  325.3947514706382
k = 2 için RMSE değeri:  179.52761335480352 RMSE_CV değeri:  293.24000183333817
k = 3 için RMSE değeri:  205.20157172291863 RMSE_CV değeri:  283.7486667487823
k = 4 için RMSE değeri:  220.5139794876305 RMSE_CV değeri:  286.3240222024089
k = 5 için RMSE değeri:  239.6467132541376 RMSE_CV değeri:  290.0705466132226
k = 6 için RMSE değeri:  243.5904190007242 RMSE_CV değeri:  298.1263115575851
k = 7 için RMSE değeri:  258.1478781634636 RMSE_CV değeri:  294.77070479194987
k = 8 için RMSE değeri:  266.05374203349805 RMSE_CV değeri:  291.98672028891235
k = 9 için RMSE değeri:  269.73782093553376 RMSE_CV değeri:  295.7162739573105
k = 10 için RMSE değeri:  271.2798300436963 RMSE_CV değeri:  301.31047022701154


In [45]:
# Model Tuning

In [47]:
knn_tuned = KNeighborsRegressor(n_neighbors=knn_cv_model.best_params_["n_neighbors"])

In [48]:
knn_tuned.fit(X_train, y_train)

In [51]:
np.sqrt(mean_squared_error(y_test,knn_tuned.predict(X_test)))

413.7094731463598