In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, root_mean_squared_error, mean_absolute_error
import os
os.chdir("D:/Training/Academy/ML(Python)/Datasets")

In [11]:
boston = pd.read_csv("Boston.csv")
X = boston.drop('medv', axis=1)
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [12]:
Ks = np.arange(1,16)
scores = []
for k in Ks:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append([k, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['k', 'score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,k,score
4,5,4.276316
0,1,4.3
5,6,4.362829
3,4,4.384868
7,8,4.414145
6,7,4.443609
9,10,4.481118
8,9,4.496345
1,2,4.5
10,11,4.530801


In [16]:
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict( X_test )
mean_absolute_error(y_test, y_pred)

4.563157894736841

In [17]:
scaler = StandardScaler().set_output(transform='pandas')
X_trn_scl = scaler.fit_transform( X_train )
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X_trn_scl, y_train)
X_tst_scl = scaler.transform( X_test )
y_pred = knn.predict( X_tst_scl )
mean_absolute_error(y_test, y_pred)

2.6899122807017544

In [18]:
Ks = np.arange(1,16)
scores = []
for k in Ks:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_trn_scl, y_train)
    y_pred = knn.predict(X_tst_scl)
    scores.append([k, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['k', 'score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,k,score
0,1,2.65
2,3,2.689912
3,4,2.719737
1,2,2.807566
5,6,2.864583
4,5,2.868947
14,15,2.928509
7,8,2.928783
10,11,2.941268
11,12,2.942654


#### Housing dataset

In [19]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_selector, ColumnTransformer

In [21]:
housing = pd.read_csv("Housing.csv")
X, y = housing.drop('price', axis=1), housing['price']
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform='pandas')
col_trnf = ColumnTransformer([('OHE',ohe, make_column_selector(dtype_include=object) )],
                             remainder='passthrough',
                             verbose_feature_names_out=False)
col_trnf = col_trnf.set_output(transform='pandas')
X = col_trnf.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

w/o scaling

In [23]:
Ks = np.arange(1,16)
scores = []
for k in Ks:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append([k, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['k', 'score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,k,score
2,3,16956.605691
4,5,16974.496341
5,6,17033.884146
13,14,17095.358014
3,4,17185.521341
14,15,17270.135772
11,12,17344.276931
10,11,17398.662417
12,13,17427.580675
6,7,17518.982578


with scaling

In [22]:
scaler = StandardScaler().set_output(transform='pandas')
X_trn_scl = scaler.fit_transform( X_train )
X_tst_scl = scaler.transform( X_test )
Ks = np.arange(1,16)
scores = []
for k in Ks:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_trn_scl, y_train)
    y_pred = knn.predict(X_tst_scl)
    scores.append([k, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['k', 'score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,k,score
3,4,12407.129573
4,5,12642.542683
9,10,12731.168293
14,15,12776.493902
10,11,12787.108647
13,14,12793.261324
12,13,12814.899156
11,12,12855.385671
8,9,12867.924119
7,8,12935.713415


#### Concrete Strength

In [24]:
os.chdir("D:/Training/Academy/ML(Python)/Cases/Concrete Strength")

In [25]:
conc = pd.read_csv("Concrete_Data.csv")
X, y = conc.drop('Strength', axis=1), conc['Strength']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [26]:
Ks = np.arange(1,16)
scores = []
for k in Ks:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append([k, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['k', 'score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,k,score
1,2,6.544013
3,4,6.576254
0,1,6.672589
4,5,6.748608
2,3,6.762762
5,6,7.064218
7,8,7.115376
6,7,7.157513
8,9,7.287192
9,10,7.43046


In [27]:
scaler = StandardScaler().set_output(transform='pandas')
X_trn_scl = scaler.fit_transform( X_train )
X_tst_scl = scaler.transform( X_test )
Ks = np.arange(1,16)
scores = []
for k in Ks:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_trn_scl, y_train)
    y_pred = knn.predict(X_tst_scl)
    scores.append([k, mean_absolute_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['k', 'score'])
df_scores.sort_values('score', ascending=True)

Unnamed: 0,k,score
1,2,6.457735
2,3,6.559687
0,1,6.561942
3,4,6.637338
4,5,6.652647
5,6,6.747913
6,7,6.800347
7,8,6.981869
8,9,7.083916
9,10,7.195628
