In [105]:
import numpy as np
from sklearn.model_selection import KFold

kf = KFold(n_splits=10)

In [106]:
data = np.genfromtxt('train.csv', delimiter=',')
y = data[1:,0]
X = data[1:,1:]

In [107]:
def ridge(lambd: float, X_train: np.ndarray, y_train: np.ndarray) -> np.ndarray:
  w = np.dot(np.dot(np.linalg.inv(np.dot(X_train.T, X_train) + lambd), X_train.T), y_train)
  return w

In [108]:
def predict(w: np.ndarray, X: np.ndarray) -> np.ndarray:
  return np.matmul(X, w)

In [109]:
def rmse(w: np.ndarray, X_test: np.ndarray, y_test: np.ndarray) -> float:
  y_pred = predict(w, X_test)
  mse = np.square(np.subtract(y_test, y_pred)).mean()
  return np.sqrt(mse)

In [110]:
def cv(lambd: float, X: np.ndarray, y: np.ndarray) -> float:
  results = np.array([])

  for train_index, test_index in kf.split(X):
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    w = ridge(lambd, X_train, y_train)
    result = rmse(w, X_test, y_test)
    results = np.append(results, [result])

  return results.mean()

In [111]:
lambds = [0.1, 1, 10, 100, 200]

output = [cv(lambd, X, y) for lambd in lambds]
np.savetxt('output.csv', output)