In [None]:
import numpy as np
np.random.seed(2049)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

## Load data

In [None]:
X = np.load("data/X_processed.npy")
y = np.load("data/y.npy")

In [None]:
ids = np.random.permutation(X.shape[0])
X_shuffled = X[ids, :]
y_shuffled = y[ids]

## Classify

In [None]:
lambdas = [0.1, 1, 10, 100, 200]
n_folds = 10

avg_rmse = list()
for alpha in lambdas:
    avg_rmse_lambda = list()
    for i in range(n_folds):
        clf = Ridge(alpha=alpha)
        s = len(ids)//n_folds
        fold_ids = ids[i*s:i*s+s]
        X_train = np.delete(X_shuffled, fold_ids, axis=0)
        X_val = X_shuffled[fold_ids]
        y_train = np.delete(y_shuffled, fold_ids)
        y_val = y_shuffled[fold_ids]
        clf.fit(X_train, y_train)
        preds_val = clf.predict(X_val)
        rmse = mean_squared_error(y_val, preds_val)**0.5
        avg_rmse_lambda.append(rmse)
    avg_rmse.append(np.mean(avg_rmse_lambda))

In [None]:
print(avg_rmse)
plt.plot(avg_rmse)

## Saving results

In [None]:
import pandas as pd
df = pd.DataFrame(avg_rmse, columns=['y'])
df.to_csv('data/test_preds.csv', index=False, header=False)