In [None]:
import numpy as np
np.random.seed(2049)

In [None]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

## Load data

In [None]:
X = np.load("data/X_processed.npy")
y = np.load("data/y.npy")

In [None]:
index = np.random.permutation(X.shape[0])
X_transformed = X[index, :]
y_transformed = y[index]

## Classify

In [None]:
alphas = [0.1, 1, 10, 100, 200]
no_folds = 10
partition_length = len(index) // no_folds

# manual cross-validation
def rmse_cv(alpha):
    rmse = list()
    for k in range(no_folds):
        ridge = Ridge(alpha=alpha)
        indexes = index[k*partition_length : k*partition_length + partition_length]
        X_test = X_transformed[indexes]
        y_test = y_transformed[indexes]
        X_train = np.delete(X_transformed, indexes, axis=0)
        y_train = np.delete(y_transformed, indexes)
        model = ridge.fit(X_train, y_train)
        y_hat = model.predict(X_test)
        rmse.append(mean_squared_error(y_test, y_hat) ** 0.5)
    return np.mean(rmse) # cv score per alpha


In [None]:
rmse_list = list()
for alpha in alphas:
    avg_rmse = rmse_cv(alpha)
    rmse_list.append(avg_rmse)

## Saving results

In [None]:
df = pd.DataFrame(rmse_list, columns=['y'])
df.to_csv('data/test_preds.csv', index=False, header=False)