In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes


In [3]:
housing_df = pd.read_csv('housing.csv')
housing_df.head()


Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

# Select features and target
features = ['median_income', 'total_rooms', 'housing_median_age']
X = housing_df[features]
y = housing_df['median_house_value']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# KNN Model
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)

# Predictions
knn_preds = knn.predict(X_test)

# Evaluation
knn_mse = mean_squared_error(y_test, knn_preds)
print("Housing Dataset - KNN Regression MSE:", knn_mse)


Housing Dataset - KNN Regression MSE: 4386370000.0


In [7]:
import numpy as np

def local_weight(x, X_train, tau):
    m = X_train.shape[0]
    weights = np.exp(-np.sum((X_train - x)**2, axis=1) / (2 * tau**2))
    return np.diag(weights)

def predict_lwr(X_train, y_train, X_test, tau):
    m = X_test.shape[0]
    y_pred = np.zeros(m)
    
    for i in range(m):
        W = local_weight(X_test[i], X_train, tau)
        XTWX = X_train.T @ W @ X_train
        XTWy = X_train.T @ W @ y_train
        theta = np.linalg.pinv(XTWX) @ XTWy
        y_pred[i] = X_test[i] @ theta
    return y_pred

# Add bias term
X_train_lwr = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_lwr = np.c_[np.ones(X_test.shape[0]), X_test]
y_train_lwr = np.array(y_train)

# Run LWR
tau = 0.5
y_pred_lwr = predict_lwr(X_train_lwr, y_train_lwr, X_test_lwr, tau)
lwr_mse = mean_squared_error(y_test, y_pred_lwr)
print("Housing Dataset - LWR MSE:", lwr_mse)


Housing Dataset - LWR MSE: 89957570000.0


In [9]:
from sklearn.datasets import load_diabetes

# Load dataset
diabetes = load_diabetes()
X_d = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y_d = diabetes.target

# Use 3 features for fair comparison
features_d = ['bmi', 'bp', 's1']
X_d = X_d[features_d]

# Split
X_train_d, X_test_d, y_train_d, y_test_d = train_test_split(X_d, y_d, test_size=0.2, random_state=42)

# KNN
knn_d = KNeighborsRegressor(n_neighbors=5)
knn_d.fit(X_train_d, y_train_d)
knn_preds_d = knn_d.predict(X_test_d)
knn_mse_d = mean_squared_error(y_test_d, knn_preds_d)
print("Diabetes Dataset - KNN MSE:", knn_mse_d)

# LWR
X_train_lwr_d = np.c_[np.ones(X_train_d.shape[0]), X_train_d]
X_test_lwr_d = np.c_[np.ones(X_test_d.shape[0]), X_test_d]
y_train_lwr_d = np.array(y_train_d)

y_pred_lwr_d = predict_lwr(X_train_lwr_d, y_train_lwr_d, X_test_lwr_d, tau=0.5)
lwr_mse_d = mean_squared_error(y_test_d, y_pred_lwr_d)
print("Diabetes Dataset - LWR MSE:", lwr_mse_d)


Diabetes Dataset - KNN MSE: 4119.697078651685
Diabetes Dataset - LWR MSE: 3727.6179640483197


In [11]:
print("\n--- Final MSE Comparison ---")
print("Housing - KNN:", knn_mse)
print("Housing - LWR:", lwr_mse)
print("Diabetes - KNN:", knn_mse_d)
print("Diabetes - LWR:", lwr_mse_d)



--- Final MSE Comparison ---
Housing - KNN: 4386370000.0
Housing - LWR: 89957570000.0
Diabetes - KNN: 4119.697078651685
Diabetes - LWR: 3727.6179640483197
