In [1]:
import numpy as np
from src.NN import NN
import src.utils as utils

# Settings
csv_filename = "data/seeds_dataset.csv"
hidden_layers = [5] # number of nodes in hidden layers i.e. [layer1, layer2, ...]
eta = 0.1 # learning rate
n_epochs = 400 # number of training epochs
n_folds = 4 # number of folds for cross-validation
seed_crossval = 1 # seed for cross-validation
seed_weights = 1 # 

In [2]:

print("Reading '{}'...".format(csv_filename))
X, y, n_classes = utils.read_csv(csv_filename, target_name="y", normalize=True)

Reading 'data/seeds_dataset.csv'...


In [3]:
print(np.hsplit(X,7)[0])

[[ 0.14209777]
 [ 0.01118803]
 [-0.19206658]
 [-0.34709127]
 [ 0.44525718]
 [-0.16106164]
 [-0.05426685]
 [-0.25407645]
 [ 0.61406184]
 [ 0.54860697]
 [ 0.14209777]
 [-0.2816364 ]
 [-0.32986631]
 [-0.36776123]
 [-0.3815412 ]
 [-0.08871678]
 [-0.29541637]
 [ 0.29023248]
 [-0.05082185]
 [-0.73293052]
 [-0.23685149]
 [-0.25407645]
 [ 0.35568735]
 [-0.95341008]
 [ 0.05597294]
 [ 0.46248214]
 [-0.62958072]
 [-0.72604053]
 [-0.25407645]
 [-0.48144601]
 [-0.58135081]
 [ 0.22133261]
 [-0.26096644]
 [-0.31264134]
 [ 0.06975291]
 [ 0.43836719]
 [ 0.46592713]
 [ 0.76908654]
 [-0.01637192]
 [-0.19551157]
 [-0.45044107]
 [-0.46422104]
 [-0.58135081]
 [ 0.22477761]
 [ 0.09042287]
 [-0.36087125]
 [ 0.1765477 ]
 [ 0.04908295]
 [-0.01981691]
 [ 0.00429804]
 [-0.14383667]
 [ 0.32123742]
 [-0.12316671]
 [-0.1782866 ]
 [-0.11283173]
 [ 0.06286292]
 [-0.13350169]
 [ 0.024968  ]
 [ 0.18343769]
 [-0.9430751 ]
 [-1.18077964]
 [-1.24623451]
 [-0.85695027]
 [-0.56068085]
 [-0.71226056]
 [-0.67781062]
 [-0.17484

In [4]:
oX = np.hsplit(X,7)[0]
N, d = oX.shape

print("Neural network model:")
print(" input_dim = {}".format(d))
print(" hidden_layers = {}".format(hidden_layers))
print(" output_dim = {}".format(n_classes))
print(" eta = {}".format(eta))
print(" n_epochs = {}".format(n_epochs))
print(" n_folds = {}".format(n_folds))
print(" seed_crossval = {}".format(seed_crossval))
print(" seed_weights = {}\n".format(seed_weights))

# Create cross-validation folds
idx_all = np.arange(0, N)
idx_folds = utils.crossval_folds(N, n_folds, seed=seed_crossval) # list of list of fold indices

# Train/evaluate the model on each fold
acc_train, acc_valid = list(), list()
print("Cross-validating with {} folds...".format(len(idx_folds)))
for i, idx_valid in enumerate(idx_folds):

    # Collect training and test data from folds
    idx_train = np.delete(idx_all, idx_valid)
    X_train, y_train = oX[idx_train], y[idx_train]
    X_valid, y_valid = oX[idx_valid], y[idx_valid]

    # Build neural network classifier model and train
    model = NN(input_dim=d, output_dim=n_classes,
                   hidden_layers=hidden_layers, seed=seed_weights)

    model.train(X_train, y_train, eta=eta, n_epochs=n_epochs)

    # Make predictions for training and test data
    ypred_train = model.predict(X_train)
    ypred_valid = model.predict(X_valid)

    # Compute training/test accuracy score from predicted values
    acc_train.append(100*np.sum(y_train==ypred_train)/len(y_train))
    acc_valid.append(100*np.sum(y_valid==ypred_valid)/len(y_valid))

    # Print cross-validation result
    print(" Fold {}/{}: acc_train = {:.2f}%, acc_valid = {:.2f}% (n_train = {}, n_valid = {})".format(
        i+1, n_folds, acc_train[-1], acc_valid[-1], len(X_train), len(X_valid)))

# Print results
print("  -> acc_train_avg = {:.2f}%, acc_valid_avg = {:.2f}%".format(
    sum(acc_train)/float(len(acc_train)), sum(acc_valid)/float(len(acc_valid))))

Neural network model:
 input_dim = 1
 hidden_layers = [5]
 output_dim = 3
 eta = 0.1
 n_epochs = 400
 n_folds = 4
 seed_crossval = 1
 seed_weights = 1

Cross-validating with 4 folds...
 Fold 1/4: acc_train = 83.54%, acc_valid = 78.85% (n_train = 158, n_valid = 52)
 Fold 2/4: acc_train = 82.91%, acc_valid = 84.62% (n_train = 158, n_valid = 52)
 Fold 3/4: acc_train = 84.81%, acc_valid = 88.46% (n_train = 158, n_valid = 52)
 Fold 4/4: acc_train = 86.08%, acc_valid = 82.69% (n_train = 158, n_valid = 52)
  -> acc_train_avg = 84.34%, acc_valid_avg = 83.65%


In [12]:
len(model.get_weights()[1])

3