<a href="https://colab.research.google.com/github/Yashodha-kapali8/ML_Programs/blob/main/LWR_Activity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Activity to complete:

1. Consider two more features and implement the algorithm
2. Implement the same for Diabetes dataset available in sklearn.datasets
3. Compare KNN regression and Local weighted regression (LWR) algorithm considering multiple features for both housing dataset and Diabetes dataset.

In [57]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml, load_diabetes
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler

# ----- Helper Functions -----
def add_bias(X):
    return np.hstack([np.ones((X.shape[0], 1)), X])

def gaussian_kernel(x0, X, tau):
    return np.exp(-np.sum((X - x0) ** 2, axis=1) / (2 * tau ** 2))

def predict_lwr(X_query, X_train, y_train, tau):
    m = X_train.shape[0]
    X_bias = add_bias(X_train)
    y_pred = []
    for x0 in X_query:
        x0_bias = np.hstack([1, x0]).reshape(1, -1)
        weights = gaussian_kernel(x0_bias, X_bias, tau)
        W = np.diag(weights)
        XTWX = X_bias.T @ W @ X_bias
        XTWy = X_bias.T @ W @ y_train
        theta = np.linalg.pinv(XTWX) @ XTWy
        y_pred.append(x0_bias @ theta)
    return np.array(y_pred).flatten()

def evaluate_model(name, y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    rel_mse = mse / (np.mean(y_true) ** 2)
    cv = rmse / np.mean(y_true)
    return {
        "Model": name,
        "MSE": mse,
        "RMSE": rmse,
        "Relative MSE": rel_mse,
        "Coefficient of Variation": cv
    }

# ----- Process Dataset -----
def process_dataset(X, y, dataset_name):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # KNN Regression
    knn = KNeighborsRegressor(n_neighbors=5)
    knn.fit(X_scaled, y)
    y_pred_knn = knn.predict(X_scaled)

    # LWR Regression
    tau = 0.5
    y_pred_lwr = predict_lwr(X_scaled, X_scaled, y, tau)

    # Evaluate
    result_knn = evaluate_model(f"{dataset_name} - KNN", y, y_pred_knn)
    result_lwr = evaluate_model(f"{dataset_name} - LWR", y, y_pred_lwr)
    return result_knn, result_lwr

# ----- Load Boston Housing Dataset -----
boston = fetch_openml(name='boston', version=1, as_frame=True)
X_boston = boston.data.astype(float)
y_boston = boston.target.astype(float)

# ----- Load Diabetes Dataset -----
diabetes = load_diabetes()
X_diabetes = diabetes.data
y_diabetes = diabetes.target

# ----- Run models and evaluate -----
results = []
results.extend(process_dataset(X_boston, y_boston, "Boston Housing"))
results.extend(process_dataset(X_diabetes, y_diabetes, "Diabetes"))

# ----- Display Results -----
df_results = pd.DataFrame(results)
df_results = df_results[["Model", "MSE", "RMSE", "Relative MSE", "Coefficient of Variation"]]
df_results.reset_index(drop=True, inplace=True)
df_results.round(4)


Unnamed: 0,Model,MSE,RMSE,Relative MSE,Coefficient of Variation
0,Boston Housing - KNN,11.3185,3.3643,0.0223,0.1493
1,Boston Housing - LWR,0.2212,0.4703,0.0004,0.0209
2,Diabetes - KNN,2342.5559,48.4,0.1012,0.3181
3,Diabetes - LWR,2.6327,1.6226,0.0001,0.0107
