In [7]:
import numpy as np

from sklearn.metrics import mean_squared_error,accuracy_score,mean_absolute_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense,Dropout,Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l1,l2
from tensorflow.keras.constraints import max_norm
from tensorflow.keras.optimizers import SGD,Adam,RMSprop
import pandas as pd
import time
from tensorflow.keras import backend as K
import os

In [10]:
import hessianfree as hf
from hessianfree.loss_funcs import LossFunction
from functools import wraps

In [11]:
# TO TURN GPU for Keras, set devic = cuda or gpu or gpu0 like this
os.environ["THEANO_FLAGS"] = "device=cuda,openmp=1,floatX=float32" 
# TO TURN ON OPENMP
os.environ["THEANO_FLAGS"] = "device=cpu,openmp=1,floatX=float32" 

In [21]:
def keras_NN(n_nodes,optimizer):
    '''This function initializes and return a new neural network with regularization techniques
       
       input: 
       n_nodes: a list of units per layer like [42,24,12,1] 
       optimizer: one of the following:
        sgd = SGD
        rmsprop = RMSprop
        adagrad = Adagrad
        adadelta = Adadelta
        adam = Adam
        adamax = Adamax
        nadam = Nadam
       

       output: an object that contains these methods:
       
       model.predict(X): return predictions corresponding to X
       
       model.get_weights(): return a list of current model weights, in the order of w0,b1,w1,b1,....w4,b4
       
       model.set_weights(): takes in a list of weights in the same format as what model.get_weights() returns
       
       model.fit(X_tr,Y_tr,verbose=0,epochs=50,batch_size=1024,validation_split=0.2, callbacks=[early_stopping]): 
       
       train a model with the inputs and the specification, you can train 1 epoch;  
       and return history of loss during training (using hist.history['loss']) and validation loss if callbacks =
       [EarlyStopping(patience=5)] (using hist.history['val_loss']) 
       
    '''
    # Clear the model
    model = None
    # BUILD INPUT LAYER
    inputs = Input(shape=(n_nodes[0],))

    # CONNECT TO THE FIRST HIDDEN LAYER
    x = Dense(n_nodes[1], kernel_initializer='he_normal', 
                    kernel_regularizer=l2(0.0001),kernel_constraint = max_norm(5), activation='relu')(inputs)
    x = Dropout(0.2)(x) # add dropout 

    # ADD SOME MORE HIDDEN LAYERS
    for i in range(2,len(n_nodes)-1):
        x = Dense(n_nodes[i],  kernel_initializer='he_normal', activation='relu',bias_initializer='he_normal',
            kernel_regularizer=l2(0.0001),kernel_constraint = max_norm(3))(x)
        x = Dropout(0.2)(x) # add dropout 

    # OUTPUT LAYER
    predictions = Dense(1, kernel_initializer='he_normal', activation='linear')(x)

    # INITIALIZE MODEL (now you can call model.get_weights() )
    model = Model(inputs=inputs, outputs=predictions)

    # Compile model with LOSS FUNCTION and ADAM OPTIMIZER
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model



In [22]:
# Example OF comparing keras and Hessian Free: 

# read data and define training, validation and test set
data = np.genfromtxt('price_inputs_GS2016.csv',delimiter=',',skip_header=1)
X,ret = data[:,2:],data[:,1:2] # X means features, ret means target 
print('shape of total X and ret:',X.shape,ret.shape)

n_test = int(X.shape[0]*0.25)
N = X.shape[0] - n_test
n_val = int(N*0.2)
X_tr_temp, X_test, ret_tr_temp,ret_test = X[:-n_test],X[-n_test:],ret[:-n_test],ret[-n_test:]
X_tr,X_val,ret_tr,ret_val = X_tr_temp[:-n_val], X_tr_temp[-n_val:],ret_tr_temp[:-n_val],ret_tr_temp[-n_val:]


# define evaluation metrics
accuracy = lambda pred,truth: np.mean((pred>0)==(truth>0))
hit_ratio = lambda x,y: np.mean( ((x[1:] - x[:-1]) * (y[1:]-y[:-1]))>0 )
eval_f = [accuracy,hit_ratio,mean_squared_error,mean_absolute_error]
labels = 'accuracy,hit_ratio,mean_squared_error,mean_absolute_error'.split(',')

n_trials = 1 # run some number of trials for each model for confidence interval 


shape of total X and ret: (19669, 42) (19669, 1)


In [23]:
################### KERAS ONLY ######################

 
# define hyperparameters
n_nodes = [42,24,12,1] # number of units per layer
batch_size = 1024

early_stopping = EarlyStopping(patience=5)
# CHOOSE adam or adagrad 
model = keras_NN(n_nodes=n_nodes,optimizer='sgd')
model.fit(X_tr,ret_tr,verbose=0,epochs=100,batch_size=batch_size,
                 validation_data=(X_val,ret_val),callbacks=[early_stopping])
print('After fitting on the training set for 100 epochs, keras return this weight parameter') 
print(model.get_weights())

After fitting on the training set for 100 epochs, keras return this weight parameter
[array([[-0.1533167 ,  0.21487777,  0.18682274, ...,  0.1323001 ,
         0.00276681,  0.17073455],
       [-0.0922346 ,  0.1681947 , -0.10161486, ..., -0.07924437,
        -0.30490264, -0.15914625],
       [-0.12916754,  0.44287652,  0.2084809 , ..., -0.07521459,
        -0.3514044 , -0.09887037],
       ...,
       [ 0.09532724,  0.0388236 , -0.2344182 , ..., -0.32659563,
        -0.10727568, -0.07862125],
       [-0.25799647, -0.02328127, -0.10619861, ...,  0.10073078,
         0.28008842,  0.03257942],
       [-0.11392363,  0.20051365, -0.10578863, ..., -0.35648307,
        -0.4311471 , -0.12027123]], dtype=float32), array([ 0.02169308, -0.07971974, -0.05559774, -0.01004806, -0.00256501,
       -0.02971967,  0.05084664, -0.05976133, -0.03575768, -0.063921  ,
       -0.01003077, -0.01349633,  0.01434619, -0.03217653, -0.05456308,
       -0.01157956,  0.00958946,  0.00285666, -0.03123203, -0.0190381

In [25]:
################### Hessian Free ######################



def output_loss(func):
    """Convenience decorator that takes a loss defined for the output layer
    and converts it into the more general form in terms of all layers."""

    @wraps(func)
    def wrapped_loss(self, activities, targets):
        result = [None for _ in activities[:-1]]
        result += [func(self, activities[-1], targets)]

        return result

    return wrapped_loss

class mse(LossFunction):
    
    @output_loss
    def loss(self, output, targets):
        return np.sum(np.nan_to_num(output - targets) ** 2,
                      axis=tuple(range(1, output.ndim))) / 2 /output.shape[0]

    @output_loss
    def d_loss(self, output, targets):
        return np.nan_to_num(output - targets)/output.shape[0]

    @output_loss
    def d2_loss(self, output, _):
        return np.ones_like(output)/output.shape[0]
    
def pack_weights(ff):
    '''
    input: an hessian free model
    output: a list of weight following keras' format
    ff follows this format: [(W_0,b_0),(W_1,b_1)...(W_H,b_H)]'''
    res = []
    for i in range(len(n_nodes)-1):
        weights = ff.get_weights(ff.W,(i,i+1))
        
        res.extend([np.array(weights[0]),np.array(weights[1])])
    return res

pshape = lambda a_list: [ w.shape for w in a_list]


# define hyperparameters
layers = (len(n_nodes)-1)*['ReLU'] + ['Linear'] # all relu except linear for output layer
n_nodes = [42,24,12,1] # number of units per layer
batch_size = 1024


# initialize a hessian free model with GPU use optional
ff = hf.FFNet(n_nodes,layers=layers,loss_type=mse(),
          W_init_params={ "coeff":1.0, "biases":1.0,"init_type":'gaussian'},use_GPU=0)

ff.run_epochs(X,ret,test=(X_val,ret_val),minibatch_size=1024,
                      optimizer=hf.opt.HessianFree(CG_iter=2),
                      max_epochs=50, plotting=True,print_period=None)

print('After fitting on the training set for 100 epochs, hessian free return this weight parameter') 
print(pack_weights(ff))

  (self.inputs.dtype, self.dtype))


After fitting on the training set for 100 epochs, hessian free return this weight parameter
[array([[ 2.3042445 , -0.9528785 ,  0.48874274, ...,  1.3194114 ,
         0.08485562,  1.4561378 ],
       [-0.35583097,  1.2491975 ,  0.7114682 , ..., -0.7036275 ,
         2.090405  ,  0.34680372],
       [ 2.665232  ,  1.9462991 , -0.6751843 , ...,  0.07705186,
        -0.4185271 , -1.3700578 ],
       ...,
       [-0.49847376,  0.15282175,  0.36301452, ...,  0.36784315,
         1.33204   ,  0.57144976],
       [ 1.7228711 ,  0.2656061 ,  1.1912862 , ...,  0.89247423,
         1.3540462 , -0.8962603 ],
       [ 1.3128672 ,  0.7689688 ,  1.2169102 , ..., -0.67522377,
        -1.2082196 , -0.48314008]], dtype=float32), array([0.84678406, 1.0113026 , 0.65226203, 0.8460618 , 1.4550768 ,
       0.63922274, 1.2413863 , 1.0704874 , 1.6226617 , 1.3157724 ,
       0.9693455 , 1.2614727 , 0.7919319 , 0.95461243, 1.0446073 ,
       1.3060049 , 0.8781145 , 0.9831377 , 0.44360113, 0.6226942 ,
       0.7

In [30]:
############################## Evaluation metrics ##############################

# run some number of trials for each model
n_trials = 1
n_nodes = [42,24,12,1] # number of units per layer
batch_size = 1024
layers = (len(n_nodes)-1)*['ReLU'] + ['Linear'] # all relu except linear for output layer

# define evaluation metrics
accuracy = lambda pred,truth: np.mean((pred>0)==(truth>0))
hit_ratio = lambda x,y: np.mean( ((x[1:] - x[:-1]) * (y[1:]-y[:-1]))>0 )
eval_f = [accuracy,hit_ratio,mean_squared_error,mean_absolute_error]
labels = 'accuracy,hit_ratio,mean_squared_error,mean_absolute_error'.split(',')

timer = np.zeros((n_trials,2))
scores = np.zeros( (n_trials,len(labels), 2) )

for i in range(n_trials):
                      
    # CHOOSE sgd, adam or adagrad 
    early_stopping = EarlyStopping(patience=5)
    start = time.time()
    model = keras_NN(n_nodes=n_nodes,optimizer='sgd')
    hist = model.fit(X_tr,ret_tr,verbose=0,epochs=100,batch_size=batch_size,
                     validation_data=(X_val,ret_val),callbacks=[early_stopping])
    timer[i,0] = time.time()-start
    
    # evaluation metrics
    pred = model.predict(X_test).flatten()
    truth = ret_test.flatten()
    scores[i,:,0] = [ f(pred,truth) for j,f in enumerate(eval_f) ]
               
    
    # initliaze a hessian free model
    ff = hf.FFNet(n_nodes,layers=layers,loss_type=hf.loss_funcs.SquaredError(),
              W_init_params={ "coeff":1.0, "biases":1.0,"init_type":'gaussian'},use_GPU=0)
    
    # Hession free
    start = time.time()
    ff.run_epochs(X,ret,test=(X_val,ret_val),minibatch_size=1024,
                          optimizer=hf.opt.HessianFree(CG_iter=2),
                          max_epochs=50, plotting=True,print_period=None)
    timer[i,1] = time.time()-start
    
    # here I am borrowing Keras' model to evaluate the loss function of weights from Hessian free
    model.set_weights(pack_weights(ff))
    
     # evaluation metrics
    pred = model.predict(X_test).flatten()
    truth = ret_test.flatten()
    scores[i,:,1] = [ f(pred,truth) for j,f in enumerate(eval_f) ]
    


# print 'keras training loss',hist.history['loss']
# print 'valdidation loss',hist.history['val_loss']
# print 'Hessian Free training loss',ff.optimizer.plots['training error (log)'] # it says log but it's not for MSE
# print 'Hessian Free validation loss',ff.test_errs

for jj in range(2):
    print 
    exp = 'keras adagrad,hessian free'.split(',')[jj]
    print('Evaluating ',exp)
    print('running time per trial',timer[:,jj])
    s = scores[:,:,jj]
    print('prediction scores')
    
    mu = s.mean(axis=0)
    sd = s.std(axis=0)

    lower_bound = np.percentile(s, 2.5, axis=0)
    upper_bound = np.percentile(s, 97.5, axis=0)
     
    for i in range(s.shape[1]):
        print(labels[i])
        ##print('mean {} and std {}'.format(mu[i],std[i]))
        print('2.5 and 97.5 percentile [{},{}]'.format(lower_bound[i],upper_bound[i]))

  (self.inputs.dtype, self.dtype))


Evaluating  keras adagrad
running time per trial [6.18560243]
prediction scores
accuracy
2.5 and 97.5 percentile [0.5049827130364043,0.5049827130364043]
hit_ratio
2.5 and 97.5 percentile [0.5008136696501221,0.5008136696501221]
mean_squared_error
2.5 and 97.5 percentile [0.900225480635662,0.900225480635662]
mean_absolute_error
2.5 and 97.5 percentile [0.5821926761449598,0.5821926761449598]
Evaluating  hessian free
running time per trial [10.52531791]
prediction scores
accuracy
2.5 and 97.5 percentile [0.48586536505999595,0.48586536505999595]
hit_ratio
2.5 and 97.5 percentile [0.37835638730675347,0.37835638730675347]
mean_squared_error
2.5 and 97.5 percentile [321.92099593473733,321.92099593473733]
mean_absolute_error
2.5 and 97.5 percentile [8.574186604401204,8.574186604401204]
