In [42]:
import numpy as np
from matplotlib import pyplot as plt
from NeuralNetwork import NeuralNetwork
from functions import *
from Layer import Layer, Input
from utils import *

In [43]:
df = get_data('/Users/HP/Desktop/UNI/LM_1/MachineLearning/ML_prj/data/MONK/monks-1.train')
df_test = get_data('/Users/HP/Desktop/UNI/LM_1/MachineLearning/ML_prj/data/MONK/monks-1.test')

In [44]:
#df = get_data(r"\Users\s512fj-ej021t\OneDrive\Desktop\ML\monks-1.train")
#df_test = get_data(r"\Users\s512fj-ej021t\OneDrive\Desktop\ML\monks-1.test")

In [45]:
X_train, y_train = df.drop(columns=['target','id']).to_numpy().T, df['target'].apply(lambda x: int(x)).to_numpy().T
X_test, y_test = df_test.drop(columns=['target','id']).to_numpy().T, df_test['target'].apply(lambda x: int(x)).to_numpy().T

In [46]:
X_train = onehot_encoding(X_train)

In [47]:
def cross_validation(input, target, folds, metrics, params, callbacks):

    dim_input = input.shape[1]
    input_index = np.arange(dim_input)
    np.random.shuffle(input_index)
    subset_index = []
    
    scale_eta_batchsize = params.pop('scale_eta_batchsize')
    dim_hidden = params.pop('dim_hidden')
    hidden_act_func = params.pop('hidden_act_func')

    if scale_eta_batchsize == 'lin':
        params['eta'] = params['eta'] * params['n_batch']
    if scale_eta_batchsize == 'sqrt':
        params['eta'] = params['eta'] * np.sqrt(params['n_batch'])
        
    for i in range(folds-1):
        subset_index.append(input_index[i*np.round(dim_input / folds).astype(int): (i+1)*np.round(dim_input / folds).astype(int)])
    subset_index.append(input_index[(i+1)*np.round(dim_input / folds).astype(int):])

   
    history_cv = {'train_loss': [],
                  #'train_loss_var': [],
                  'val_loss': []
                  #'val_loss_var': []
                  }
    
    for m in metrics:
        history_cv[f'train_{m.__name__}'] = []
        #history_cv[f'train_{m.__name__}_var'] = []
        history_cv[f'val_{m.__name__}'] = []
        #history_cv[f'val_{m.__name__}_var'] = []

    for val_ind in subset_index:

        train_ind = list(set(input_index) - set(val_ind))

        train_input = input[:,train_ind]
        train_target = target[:,train_ind]
        val_input = input[:,val_ind]
        val_target = target[:,val_ind]

        input_layer = Input(17)
        hidden_layer = Layer(input_layer, dim_hidden, hidden_act_func)
        # For per aumentare hidden layers?
        output_layer = Layer(hidden_layer, 1, 'sigm')

        model = NeuralNetwork(input_layer, output_layer, loss = 'binary_crossentropy', metrics = metrics)

        history = model.train(train_input, train_target, **params,
                              **callbacks,
                                validation_data = [val_input, val_target],
                                verbose=0
                            )
        
        history_cv['train_loss'].append(history['train_loss'][-1])
        history_cv['val_loss'].append(history['val_loss'][-1])
        for m in metrics:
            history_cv[f'train_{m.__name__}'].append(history[f'train_{m.__name__}'][-1])
            history_cv[f'val_{m.__name__}'].append(history[f'val_{m.__name__}'][-1])
    
    history_cv['train_loss_mean'] = np.mean(history_cv['train_loss'])
    history_cv['train_loss_std'] = np.std(history_cv['train_loss'])
    history_cv['val_loss_mean'] = np.mean(history_cv['val_loss'])
    history_cv['val_loss_std'] = np.std(history_cv['val_loss'])
    del history_cv['train_loss']
    del history_cv['val_loss']

    for m in metrics:
        history_cv[f'train_{m.__name__}_mean'] = np.mean(history_cv[f'train_{m.__name__}'])
        history_cv[f'train_{m.__name__}_std'] = np.std(history_cv[f'train_{m.__name__}'])
        history_cv[f'val_{m.__name__}_mean'] = np.mean(history_cv[f'val_{m.__name__}'])
        history_cv[f'val_{m.__name__}_std'] = np.std(history_cv[f'val_{m.__name__}'])
        del history_cv[f'train_{m.__name__}']
        del history_cv[f'val_{m.__name__}']

    return history_cv
            

In [48]:
def grid_search(input, target, params, cv_folds, metrics, callbacks):
    param_grid = makeGrid(params)
    print('Starting grid_search...')
    print(f'Grid of parameters: {params}')
    print('-------------------------------------------------')
    for p_comb in param_grid:
        print(f'Starting params: {p_comb}')
        p_comb['results'] = cross_validation(input, target, cv_folds, metrics, p_comb, callbacks)
        
        print(f'Results:')
        for key, value in p_comb['results'].items():
            print(f'{key}: {value:.2e}')
        print('-------------------------------------------------')
    
    best_m = param_grid[0]['results'][f'val_accuracy_mean']
    best_comb = param_grid[0]
    for p_comb in param_grid:
        if p_comb['results'][f'val_accuracy_mean'] > best_m:
            best_m = p_comb['results'][f'val_accuracy_mean']
            best_comb = p_comb
    print(f'Best combination of parameters: {best_comb}')
    return best_comb

In [49]:
params = {
          'eta' : [0.01,0.02],
          'lam' : [0.0, 0.1],
          'alpha':[0.5,0.9],
          'epochs': [50],
          'n_batch' : [1,31],
          'scale_eta_batchsize' : ['sqrt'], #'sqrt' per eta * sqrt(n_batch), 'lin' per eta * n_batch
          
          'dim_hidden' : [2,3],
          'hidden_act_func' : ['relu']
        }


In [50]:
early_stopping = {'patience' : 150,
                  'monitor' : 'val_accuracy',
                  'verbose' : 0,
                  'compare_function': np.greater_equal}

reduce_eta = {'patience' : 75,
              'monitor' : 'val_accuracy',
              'factor' : 0.5,
              'verbose' : 0,
              'compare_function': np.greater_equal}

callbacks = {'early_stopping': None,
             'reduce_eta': None}

In [51]:
best_comb = grid_search(X_train, y_train.reshape((1,124)), params, 5, [accuracy], callbacks)
    

Starting grid_search...
Grid of parameters: {'eta': [0.01, 0.02], 'lam': [0.0, 0.1], 'alpha': [0.5, 0.9], 'epochs': [50], 'n_batch': [1, 31], 'scale_eta_batchsize': ['sqrt'], 'dim_hidden': [2, 3], 'hidden_act_func': ['relu']}
-------------------------------------------------
Starting params: {'eta': 0.01, 'lam': 0.0, 'alpha': 0.5, 'epochs': 50, 'n_batch': 1, 'scale_eta_batchsize': 'sqrt', 'dim_hidden': 2, 'hidden_act_func': 'relu'}


Results:
train_loss_mean: 4.94e-01
train_loss_std: 7.03e-02
val_loss_mean: 5.92e-01
val_loss_std: 7.14e-02
train_accuracy_mean: 8.77e+01
train_accuracy_std: 6.56e+00
val_accuracy_mean: 7.67e+01
val_accuracy_std: 1.17e+01
-------------------------------------------------
Starting params: {'eta': 0.01, 'lam': 0.0, 'alpha': 0.5, 'epochs': 50, 'n_batch': 1, 'scale_eta_batchsize': 'sqrt', 'dim_hidden': 3, 'hidden_act_func': 'relu'}
Results:
train_loss_mean: 3.90e-01
train_loss_std: 5.09e-02
val_loss_mean: 5.42e-01
val_loss_std: 7.69e-02
train_accuracy_mean: 8.89e+01
train_accuracy_std: 5.43e+00
val_accuracy_mean: 7.94e+01
val_accuracy_std: 1.13e+01
-------------------------------------------------
Starting params: {'eta': 0.01, 'lam': 0.0, 'alpha': 0.5, 'epochs': 50, 'n_batch': 31, 'scale_eta_batchsize': 'sqrt', 'dim_hidden': 2, 'hidden_act_func': 'relu'}
Results:
train_loss_mean: 6.71e-01
train_loss_std: 2.19e-02
val_loss_mean: 6.95e-01
val_loss_std: 1.33e-02
train_accuracy_mean: 6.52e+01


  return (- (target * np.log(layer) + (1 - target) * np.log(1 - layer))/target.shape[1]).reshape(layer.shape)
  return (- (target * np.log(layer) + (1 - target) * np.log(1 - layer))/target.shape[1]).reshape(layer.shape)
  return (layer - target)/(layer*(1-layer)*target.shape[1])
  outputs = ufunc(*inputs)
  outputs = ufunc(*inputs)


Model couldn't fit: occurred divergence!
Results:
train_loss_mean: nan
train_loss_std: nan
val_loss_mean: nan
val_loss_std: nan
train_accuracy_mean: 8.49e+01
train_accuracy_std: 1.27e+01
val_accuracy_mean: 8.64e+01
val_accuracy_std: 1.20e+01
-------------------------------------------------
Starting params: {'eta': 0.01, 'lam': 0.0, 'alpha': 0.9, 'epochs': 50, 'n_batch': 1, 'scale_eta_batchsize': 'sqrt', 'dim_hidden': 3, 'hidden_act_func': 'relu'}
Results:
train_loss_mean: 2.95e-01
train_loss_std: 9.76e-02
val_loss_mean: 4.08e-01
val_loss_std: 1.63e-01
train_accuracy_mean: 9.29e+01
train_accuracy_std: 6.19e+00
val_accuracy_mean: 9.12e+01
val_accuracy_std: 8.62e+00
-------------------------------------------------
Starting params: {'eta': 0.01, 'lam': 0.0, 'alpha': 0.9, 'epochs': 50, 'n_batch': 31, 'scale_eta_batchsize': 'sqrt', 'dim_hidden': 2, 'hidden_act_func': 'relu'}
Results:
train_loss_mean: 6.50e-01
train_loss_std: 2.77e-02
val_loss_mean: 6.82e-01
val_loss_std: 2.69e-02
train_acc