In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

In [None]:
data = pd.read_csv('knnData.csv')
data.describe()

In [None]:
train_x = np.array(data[['trainPoints_x1', 'trainPoints_x2']])
train_y = np.array(data[['trainLabel']])
test_x = np.array(data[['testPoints_x1', 'testPoints_x2']])
test_y = np.array(data[['testLabel']])

In [None]:
def myKnn(xTrain, yTrain, xTest, yTest, distMetric):
    distArray = cdist(xTest, xTrain, distMetric) # The first row contains the distances of the first test from every train point
    w = 1/(np.sort(distArray, axis = 1)**2)
    idx = np.argsort(distArray, axis = 1)
    nearTrain_y=yTrain[idx[:,0:3]].reshape((40,3)) # output of the nearest 3 neighbors
    pred = np.where(np.multiply(w[:,0:3],nearTrain_y).sum(axis = 1) < 0,  -1, 1)
    pred = pred.reshape(40,1)
    print('Accuracy for %s norm is' % distMetric, (pred == test_y).sum()/len(test_y)*100)

In [None]:
myMetrics = ['cityblock','euclidean','chebyshev']
results = [myKnn(train_x, train_y, test_x, test_y, i) for i in myMetrics]

In [1]:
import numpy as np

# Given data points
data_points = [(1, 3), (4, 3.5), (6, 5)]

# Function to calculate mean squared error
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

# Constant model: y = c
def constant_model(x, c):
    return c

# Linear model: y = mx + b
def linear_model(x, m, b):
    return m * x + b

# Perform leave-one-out cross-validation for constant model
constant_mse = []
for i in range(len(data_points)):
    x_train = [point[0] for j, point in enumerate(data_points) if j != i]
    y_train = [point[1] for j, point in enumerate(data_points) if j != i]
    x_test, y_test = data_points[i]
    c = np.mean(y_train)  # Constant value is the mean of y_train
    y_pred = constant_model(x_test, c)
    constant_mse.append(mse(y_test, y_pred))

# Perform leave-one-out cross-validation for linear model
linear_mse = []
for i in range(len(data_points)):
    x_train = [point[0] for j, point in enumerate(data_points) if j != i]
    y_train = [point[1] for j, point in enumerate(data_points) if j != i]
    x_test, y_test = data_points[i]
    # Fit a linear model (y = mx + b) using least squares
    A = np.vstack([x_train, np.ones(len(x_train))]).T
    m, b = np.linalg.lstsq(A, y_train, rcond=None)[0]
    y_pred = linear_model(x_test, m, b)
    linear_mse.append(mse(y_test, y_pred))

# Compare mean squared errors
avg_constant_mse = np.mean(constant_mse)
avg_linear_mse = np.mean(linear_mse)

print("Mean Squared Error for Constant Model:", avg_constant_mse)
print("Mean Squared Error for Linear Model:", avg_linear_mse)

if avg_constant_mse < avg_linear_mse:
    print("Constant model is preferred.")
elif avg_constant_mse > avg_linear_mse:
    print("Linear model is preferred.")
else:
    print("Both models perform equally well.")

Mean Squared Error for Constant Model: 1.625
Mean Squared Error for Linear Model: 1.6378703703703745
Constant model is preferred.
