In [31]:
import pandas as pd
import numpy as np

In [32]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor

In [33]:
X,y = load_diabetes(return_X_y=True)

In [34]:
#Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
kr = KNeighborsRegressor(n_neighbors=15)
kr.fit(X_train, y_train)
y_pred = kr.predict(X_test)

In [36]:
from sklearn.metrics import mean_squared_error, r2_score
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Squared Error: 3081.7412234706626
R^2 Score: 0.41833655022555727


In [41]:
#Weighted KNN
kr_weighted = KNeighborsRegressor(n_neighbors=15, weights='distance')
kr_weighted.fit(X_train, y_train)
y_pred_weighted = kr_weighted.predict(X_test)
r2_weighted = r2_score(y_test, y_pred_weighted)
mse_weighted = mean_squared_error(y_test, y_pred_weighted)
print(f"Weighted KNN Mean Squared Error: {mse_weighted}")
print(f"Weighted KNN R^2 Score: {r2_weighted}")

Weighted KNN Mean Squared Error: 3009.5368937092494
Weighted KNN R^2 Score: 0.4319647611920757


In [42]:
# Manhattan distance
kr_manhattan = KNeighborsRegressor(n_neighbors=15, metric='manhattan')
kr_manhattan.fit(X_train, y_train)
y_pred_manhattan = kr_manhattan.predict(X_test)
r2_manhattan = r2_score(y_test, y_pred_manhattan)
mse_manhattan = mean_squared_error(y_test, y_pred_manhattan)
print(f"Manhattan KNN Mean Squared Error: {mse_manhattan}")
print(f"Manhattan KNN R^2 Score: {r2_manhattan}")

Manhattan KNN Mean Squared Error: 3130.7231460674157
Manhattan KNN R^2 Score: 0.40909145402564806


In [49]:
#Trying power parameter from 1 to 5 for 0.5 step and see the r2 scoree
max_p = []
for p in np.arange(1, 10.5, 0.5):
    kr_power = KNeighborsRegressor(n_neighbors=15, p=p)
    kr_power.fit(X_train, y_train)
    y_pred_power = kr_power.predict(X_test)
    r2_power = r2_score(y_test, y_pred_power)
    mse_power = mean_squared_error(y_test, y_pred_power)
    print(f"Minkowski KNN (p={p}) Mean Squared Error: {mse_power}")
    print(f"Minkowski KNN (p={p}) R^2 Score: {r2_power}")
    #Append r2 score to max list
    max_p.append(r2_power)

Minkowski KNN (p=1.0) Mean Squared Error: 3130.7231460674157
Minkowski KNN (p=1.0) R^2 Score: 0.40909145402564806
Minkowski KNN (p=1.5) Mean Squared Error: 3118.9894631710363
Minkowski KNN (p=1.5) R^2 Score: 0.41130612877513295
Minkowski KNN (p=2.0) Mean Squared Error: 3081.7412234706626
Minkowski KNN (p=2.0) R^2 Score: 0.41833655022555727
Minkowski KNN (p=2.5) Mean Squared Error: 3061.3386766541826
Minkowski KNN (p=2.5) R^2 Score: 0.42218743026541117
Minkowski KNN (p=3.0) Mean Squared Error: 3096.043695380774
Minkowski KNN (p=3.0) R^2 Score: 0.41563702922483914
Minkowski KNN (p=3.5) Mean Squared Error: 2980.438601747816
Minkowski KNN (p=3.5) R^2 Score: 0.43745692022085036
Minkowski KNN (p=4.0) Mean Squared Error: 2930.7233458177275
Minkowski KNN (p=4.0) R^2 Score: 0.44684042946895897
Minkowski KNN (p=4.5) Mean Squared Error: 2869.4045942571784
Minkowski KNN (p=4.5) R^2 Score: 0.4584140412624901
Minkowski KNN (p=5.0) Mean Squared Error: 2777.0736079900125
Minkowski KNN (p=5.0) R^2 Scor

In [38]:
#Linear regression model 
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred_lr)
mse_lr = mean_squared_error(y_test, y_pred_lr)
print(f"Linear Regression Mean Squared Error: {mse_lr}")
print(f"Linear Regression R^2 Score: {r2_lr}")

Linear Regression Mean Squared Error: 2900.1936284934814
Linear Regression R^2 Score: 0.4526027629719195


In [39]:
#Rigid Regularization 
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print(f"Ridge Regression Mean Squared Error: {mse_ridge}")
print(f"Ridge Regression R^2 Score: {r2_ridge}")

Ridge Regression Mean Squared Error: 3077.41593882723
Ridge Regression R^2 Score: 0.41915292635986545


In [40]:
#Polynomial Regression
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
y_pred_poly = poly_reg.predict(X_test_poly)
r2_poly = r2_score(y_test, y_pred_poly)
mse_poly = mean_squared_error(y_test, y_pred_poly)
print(f"Polynomial Regression Mean Squared Error: {mse_poly}")
print(f"Polynomial Regression R^2 Score: {r2_poly}")    


Polynomial Regression Mean Squared Error: 3096.0283073442765
Polynomial Regression R^2 Score: 0.4156399336407993
