In [None]:
import numpy as np

def nonlinear_transform(x1, x2): #function to perform the nonlinear transform
  return np.array([1, x1, x2, x1**2, x2**2, x1 * x2, abs(x1 - x2), abs(x1 + x2)])

def parse_data(filename): #function to parse the data for each file
  data = []
  labels = []
  with open(filename, 'r') as file: #open the file
    for line in file: #for each line of the file get the data x1, x2, y
      x1, x2, y = map(float, line.strip().split())
      transformed_x = nonlinear_transform(x1, x2) #transform the data
      data.append(transformed_x) #append the transformed data to data
      labels.append(y) #get the labels
  return np.array(data), np.array(labels)

train_data, train_labels = parse_data("training.txt") #parse training data from file
test_data, test_labels = parse_data("testing.txt") #parse testing data from file

X = train_data
y = train_labels
w = np.linalg.pinv(X) @ y

def classify(X, w): #classify points by label
  predictions = np.sign(X @ w)
  return predictions

def misclassified(X, y, w): #find the avg number of misclassified points
  predictions = classify(X, w)
  error = np.mean(predictions != y)
  return error

Ein = misclassified(train_data, train_labels, w) #calculate E_in
Eout = misclassified(test_data, test_labels, w) #calculate E_out

print(f"In-sample error: {Ein}")
print(f"Out-of-sample error: {Eout}")

In-sample error: 0.02857142857142857
Out-of-sample error: 0.084


In [None]:
def linear_regression_with_decay(X, y, lambda_val): #do linear regression with the decay of lambda
  XTX = X.T @ X
  lambda_identity = lambda_val * np.identity(X.shape[1])
  w = np.linalg.inv(XTX + lambda_identity) @ X.T @ y
  return w

X_train, y_train = parse_data("training.txt") #reparse each file
X_test, y_test = parse_data("testing.txt")

lambda_val = 10**-3 #set the lambda value

w = linear_regression_with_decay(X_train, y_train, lambda_val) #get the weights with decay

in_sample_error = misclassified(X_train, y_train, w) #calculate in sample error
out_of_sample_error = misclassified(X_test, y_test, w) #calculate out of sample error

print(f'In-sample error: {in_sample_error}')
print(f'Out-of-sample error: {out_of_sample_error}')

In-sample error: 0.02857142857142857
Out-of-sample error: 0.08


In [None]:
lambda_val = 10**3 ### set lambda given k = 3

w = linear_regression_with_decay(X_train, y_train, lambda_val) #find weights with decay

in_sample_error = misclassified(X_train, y_train, w) #calculate in sample error
out_of_sample_error = misclassified(X_test, y_test, w) #calculate out of sample error

print(f'In-sample error: {in_sample_error}')
print(f'Out-of-sample error: {out_of_sample_error}')

In-sample error: 0.37142857142857144
Out-of-sample error: 0.436


In [None]:
lambdas = [10**2, 10**1, 10**0, 10**-1, 10**-2] #testing each k value from the options

for lambda_val in lambdas: # for each k-value calculate the errors
  w = linear_regression_with_decay(X_train, y_train, lambda_val)

  in_sample_error = misclassified(X_train, y_train, w)
  out_of_sample_error = misclassified(X_test, y_test, w)
  print(f'lambda = {lambda_val}')
  print(f'In-sample error: {in_sample_error}')
  print(f'Out-of-sample error: {out_of_sample_error}')

lambda = 100
In-sample error: 0.2
Out-of-sample error: 0.228
lambda = 10
In-sample error: 0.05714285714285714
Out-of-sample error: 0.124
lambda = 1
In-sample error: 0.0
Out-of-sample error: 0.092
lambda = 0.1
In-sample error: 0.02857142857142857
Out-of-sample error: 0.056
lambda = 0.01
In-sample error: 0.02857142857142857
Out-of-sample error: 0.084
