In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, GridSearchCV
import warnings
from sklearn.metrics import ConfusionMatrixDisplay, precision_score, recall_score, f1_score, classification_report, roc_curve, roc_auc_score, log_loss, r2_score

In [18]:
df = pd.read_csv('../Datasets/Housing.csv')
df.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
1,38500.0,4000,2,1,1,yes,no,no,no,no,0,no
2,49500.0,3060,3,1,1,yes,no,no,no,no,0,no
3,60500.0,6650,3,1,2,yes,yes,no,no,no,0,no
4,61000.0,6360,2,1,1,yes,no,no,no,no,0,no


In [19]:
dum_house = pd.get_dummies(df, drop_first=True)
dum_house.head()

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,garagepl,driveway_yes,recroom_yes,fullbase_yes,gashw_yes,airco_yes,prefarea_yes
0,42000.0,5850,3,1,2,1,True,False,True,False,False,False
1,38500.0,4000,2,1,1,0,True,False,False,False,False,False
2,49500.0,3060,3,1,1,0,True,False,False,False,False,False
3,60500.0,6650,3,1,2,0,True,True,False,False,False,False
4,61000.0,6360,2,1,1,0,True,False,False,False,False,False


In [20]:
X = dum_house.drop('price', axis=1)
y = dum_house['price']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24)

In [27]:
knn = KNeighborsRegressor()

In [29]:
scaler = StandardScaler()

pipe = Pipeline([('SCL',scaler), ('KNN', knn)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(r2_score(y_test, y_pred))

0.64720082660368


In [30]:
mnx_scaler = MinMaxScaler()

pipe = Pipeline([('SCL', mnx_scaler), ('KNN', knn)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(r2_score(y_test, y_pred))

0.532142292355778


In [33]:
pipe = Pipeline([('SCL', None), ('KNN', knn)])
params = {'KNN__n_neighbors': np.arange(1, 25), 'SCL':[None, scaler, mnx_scaler]} 
kfold = KFold(n_splits=5, shuffle=True, random_state=24)

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold)
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'KNN__n_neighbors': 9, 'SCL': StandardScaler()}
0.6143509227778978
