In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris, fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# KNN from scratch
def knn_predict(X_train, y_train, X_test, k=3):
    predictions = []
    for x in X_test:
        distances = np.linalg.norm(X_train - x, axis=1)
        idx = np.argsort(distances)[:k]
        nearest_labels = y_train[idx]
        counts = np.bincount(nearest_labels)
        predictions.append(np.argmax(counts))
    return np.array(predictions)

# LVQ from scratch
def lvq_train(X, y, n_classes, n_epochs=50, lr=0.01):
    codebook = np.zeros((n_classes, X.shape[1]))
    for i in range(n_classes):
        codebook[i] = X[y == i][0]
    for epoch in range(n_epochs):
        for i in range(len(X)):
            dists = np.linalg.norm(codebook - X[i], axis=1)
            winner = np.argmin(dists)
            if winner == y[i]:
                codebook[winner] += lr * (X[i] - codebook[winner])
            else:
                codebook[winner] -= lr * (X[i] - codebook[winner])
    return codebook

def lvq_predict(codebook, X):
    predictions = []
    for x in X:
        dists = np.linalg.norm(codebook - x, axis=1)
        predictions.append(np.argmin(dists))
    return np.array(predictions)

# Linear Regression from scratch using gradient descent
def linear_regression(X, y, lr=0.01, epochs=200):
    X = np.c_[np.ones(X.shape[0]), X]
    weights = np.zeros(X.shape[1])
    for _ in range(epochs):
        preds = X.dot(weights)
        errors = preds - y
        grad = X.T.dot(errors) / len(y)
        weights -= lr * grad
    return weights

def predict_lr(X, weights):
    X = np.c_[np.ones(X.shape[0]), X]
    return X.dot(weights)

# Load IRIS for KNN & LVQ
iris = load_iris()
X_iris, y_iris = iris.data, iris.target
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

# Run KNN
knn_preds = knn_predict(X_train_iris, y_train_iris, X_test_iris, k=3)
knn_acc = np.mean(knn_preds == y_test_iris)

# Run LVQ
lvq_codebook = lvq_train(X_train_iris, y_train_iris, n_classes=3, n_epochs=100, lr=0.01)
lvq_preds = lvq_predict(lvq_codebook, X_test_iris)
lvq_acc = np.mean(lvq_preds == y_test_iris)

# Load California Housing for Linear Regression
housing = fetch_california_housing()
X_h, y_h = housing.data, housing.target
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(X_h, y_h, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_h = scaler.fit_transform(X_train_h)
X_test_h = scaler.transform(X_test_h)

# Train Linear Regression
weights = linear_regression(X_train_h, y_train_h)
y_pred = predict_lr(X_test_h, weights)

# MSE
mse = np.mean((y_pred - y_test_h) ** 2) if not np.isnan(y_pred).any() else "Prediction contains NaN"

# Print Results
print(f"KNN Accuracy: {round(knn_acc, 4)}")
print(f"LVQ Accuracy: {round(lvq_acc, 4)}")
print(f"Linear Regression MSE (California Housing): {mse}")


KNN Accuracy: 1.0
LVQ Accuracy: 0.9667
Linear Regression MSE (California Housing): 0.7193985663893881
