In [21]:
from nn import NeuralNetwork
import numpy as np
from sklearn.datasets import load_digits

## Load in data, make training (80%) and validation (20%) sets

In [22]:
data = load_digits()["data"]
n = data.shape[0]
train_rows = int(0.8*n) # 80% train, 20% validation

In [23]:
np.random.shuffle(data)
X_train, X_val = data[0:train_rows, :], data[train_rows:n, :]

## Train autoencoder, searching over grid of hyperparameters

In [24]:
epochs = [10, 20, 30, 40, 50]
lrs = [1, 0.1, 0.01, 0.001]
batch_sizes = [10, 20, 30, 40, 50]
activations = ["relu", "sigmoid"]
val_loss = np.zeros((len(epochs), len(lrs), len(batch_sizes), len(activations))) # validation loss as a function of the hyperparameters

In [25]:
for e, epoch in enumerate(epochs):
    for l, lr in enumerate(lrs):
        for b, batch_size in enumerate(batch_sizes):
            for a, activation in enumerate(activations):
                
                net = NeuralNetwork([{'input_dim': 64, 'output_dim': 16, 'activation': activation},
                                     {'input_dim': 16, 'output_dim': 64, 'activation': activation}],
                                    lr, 42, batch_size, epoch, "mse")
                
                # this is a reconstruction task, so the data and the target are the same 
                net.fit(X_train, X_train, X_val, X_val)
                # get validation loss
                val_loss[e, l, b, a] = net._mean_squared_error(X_val, net.predict(X_val))

ValueError: shapes (10,127) and (64,16) not aligned: 127 (dim 1) != 64 (dim 0)

In [None]:
# get one set of best hyperparameters (potentially could be more than one)
min_val_loss_idxs = np.where(val_loss == np.min(val_loss))
one_min_idx = [x[0] for x in min_val_loss_idxs]
epoch, lr, batch_size, activation = epochs[one_min_idx[0]], lrs[one_min_idx[1]], batch_sizes[one_min_idx[2]], activations[one_min_idx[3]]
print(f"Selected hyperparameters: epochs: {epoch}, learning rate: {lr}, batch_size: {batch_size}, activation: {activation}")