# Training on LETOR 4- FOLD 5

### In this version, the gridsearch is limited to those hyper parameters were found to work best. Another combination might be found by personalizing the ranges of the values of interest (learning rate, epochs, etc)

The only thing that changes form one training file to another is the path. Based on this, the train validation and test set will be different. 

Setting the enverionment seeds

In [1]:
path='../../MQ2008/Fold5/'
from utils import *
from tqdm import tqdm_notebook as tqdm
import logging
my_seed=1
set_libraries_seeds(my_seed) #Important to approach reproductible results 
#tf.logging.set_verbosity(tf.logging.ERROR) #setting a lower verbosity
logging.getLogger("tensorflow").setLevel(logging.ERROR)

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Loading the data folds

In [2]:
data_vali=load_data(path+'vali.txt')
data_train=load_data(path+'train.txt')
data_test=load_data(path+'test.txt')

Checking that the needed folders to perform evaluations, are created

In [3]:
create_folders(path,["models","new_predictions","performance"])

The directory ../../MQ2008/Fold5/models  already exists
The directory ../../MQ2008/Fold5/new_predictions  already exists
The directory ../../MQ2008/Fold5/performance  already exists


# Linear models

In [4]:
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.optimizers import SGD
import time
from keras.regularizers import l1
t_ini=time.time()
from itertools import product,count
#epochs=[500,1000]
#learning_rate=[0.001,0.075]
#mon=[0.3,0.5]
#act=['sigmoid','linear']
epochs=[1000]
learning_rate=[0.001]
mon=[0.3]
act=['linear']
initializer=keras.initializers.glorot_uniform(seed=my_seed)
bias=0.1
total_it=len(epochs)*len(learning_rate)*len(mon)*len(act)

In [5]:
save_model=False #if we wanted to save the models, we would simply have to set this as true

In [6]:
def linear_model_iteration(my_iter,name,counter):
    """
    Create a single linear model
    
    my_iter: array of 4 hyper-parameters which must respectively be:
        - 0 => number of iteration
        - 1 => learning rate
        - 2 => momentum
        - 3 => activation function
        
    name: the name given to the model file
    
    counter: the number count of the model, will be added as a suffix to the name
    """
    print("Iteration:",counter,"/",total_it)
    name=name+str(counter)
    
    x_list_train,y_list_train=get_list_xy(data_train)
    
    n_iter=my_iter[0]
    n_=my_iter[1]
    mom_=my_iter[2]
    act_=my_iter[3]

    modelq = Sequential()
    modelq.add(Dense(1,input_dim=x_list_train[0].shape[1], activation=act_,
                     kernel_initializer=initializer, bias_initializer=Constant(value=bias)))
    opt = SGD(lr=n_, momentum=mom_)

    modelq.compile(loss=Loss_query_keras, optimizer=opt)
    #display(modelq.summary())
    

    for j in tqdm(range(n_iter)):

        for ki in range(len(y_list_train)):
            if act_=='sigmoid':
                loss_qi=modelq.train_on_batch(x=x_list_train[ki],y=y_list_train[ki]/2)
            else:
                loss_qi=modelq.train_on_batch(x=x_list_train[ki],y=y_list_train[ki])
            
            

    
    print("Time elapsed so far:")
    print(convert_to_time(time.time()-t_ini))
    print(my_iter,"\n\n\n")
    if(save_model):
        modelq.save(path+'new_models/model_'+name+'.h5')
    

    y_pred_train=modelq.predict(data_train.drop(['relevance degree','qid'],axis=1)).astype('float64').ravel()
    df_train=pd.DataFrame(y_pred_train)
    df_train.to_csv(path+'new_predictions/y_train_'+name+'.txt',sep=' ',header=False,index=False)

    y_pred_vali=modelq.predict(data_vali.drop(['relevance degree','qid'],axis=1)).astype('float64').ravel()
    df_vali=pd.DataFrame(y_pred_vali)
    df_vali.to_csv(path+'new_predictions/y_vali_'+name+'.txt',sep=' ',header=False,index=False)


    y_pred_test=modelq.predict(data_test.drop(['relevance degree','qid'],axis=1)).astype('float64').ravel()
    df_test=pd.DataFrame(y_pred_test)
    df_test.to_csv(path+'new_predictions/y_test_'+name+'.txt',sep=' ',header=False,index=False)
    print("                             ",counter,"                                     ")
    print("=================================================================\n\n\n\n")

In [7]:
from joblib import Parallel, delayed
Parallel(n_jobs=4,verbose=50)(delayed(linear_model_iteration)(my_iter,'LETOR',counter) 
                               for(my_iter,counter) in zip(product(epochs,learning_rate,mon,act),
                                                                   range(1,total_it+1)))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:  7.9min
[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:  7.9min finished


[None]

# Neural Networks

We use a relu activation functions as it converges faster and appears to perform better (than sigmoid for instance) according to Literature. 

The regularization we use is Dropout, which implies randomly shutting down Neurons in the Network, to avoid overfitting. 

In [8]:
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.optimizers import SGD
import time
from keras.regularizers import l1
t_ini=time.time()
from itertools import product
#epochs=[300,400,500,600]
#learning_rate=[0.0025]
#mon=[0.5]
#act=['linear']
#inner_act=['relu']
#number_layers=[1,2]
#number_neurons=[100]
#dropout=[0.65,0.8]
epochs=[600]
learning_rate=[0.0025]
mon=[0.5]
act=['linear']
inner_act=['relu']
number_layers=[1]
number_neurons=[100]
dropout=[0.65]
counter=1
initializer=keras.initializers.glorot_uniform(seed=my_seed)
bias=0.1
total_it=len(epochs)*len(learning_rate)*len(mon)*len(act)*len(inner_act)\
*len(number_layers)*len(number_neurons)*len(dropout)

In [9]:
def NN_model_iteration(my_iter,name,counter):
    """
    Create a single neural network model
    
    my_iter: array of 8 hyper-parameters which must respectively be:
        - 0 => number of iteration
        - 1 => learning rate
        - 2 => momentum
        - 3 => activation function
        - 4 => inner layers activation function
        - 5 => number of hidden layers
        - 6 => number of neurons by hidden layers
        - 7 => dropout
        
    name: the name given to the model file
    
    counter: the number count of the model, will be added as a suffix to the name
    """
    name=name+str(counter)
    print("Iteration:",counter,"/",total_it)

    x_list_train,y_list_train=get_list_xy(data_train)
    
    n_iter=my_iter[0]
    n_=my_iter[1]
    mom_=my_iter[2]
    act_=my_iter[3]
    
    inner_act_=my_iter[4]
    number_layers_=my_iter[5]
    number_neurons_=my_iter[6]
    dropout_=my_iter[7]

    modelq = Sequential()
    modelq.add(Dense(number_neurons_,input_dim=x_list_train[0].shape[1], activation=inner_act_,
                     kernel_initializer=initializer, bias_initializer=Constant(value=bias)))
    modelq.add(Dropout(dropout_))
    # Adding hidden layers based on the gridsearch value
    
    for i in range(number_layers_):
        modelq.add(Dense(number_neurons_,activation=inner_act_,
                        kernel_initializer=initializer,
                        bias_initializer=Constant(value=bias)))
        modelq.add(Dropout(dropout_))

    modelq.add(Dense(1,activation=act_,
                kernel_initializer=initializer,
                bias_initializer=Constant(value=bias)))
    opt = SGD(lr=n_, momentum=mom_)

    modelq.compile(loss=Loss_query_keras, optimizer=opt)
    #display(modelq.summary())
    

    for j in tqdm(range(n_iter)):

        for ki in range(len(y_list_train)):
            if act_=='sigmoid':
                loss_qi=modelq.train_on_batch(x=x_list_train[ki],y=y_list_train[ki]/2)
            else:
                loss_qi=modelq.train_on_batch(x=x_list_train[ki],y=y_list_train[ki])
            
            

    
    print("Time elapsed so far:")
    print(convert_to_time(time.time()-t_ini))
    print(my_iter,"\n\n\n")
    if(save_model):
        modelq.save(path+'models/model_'+name+'.h5')
    

    y_pred_train=modelq.predict(data_train.drop(['relevance degree','qid'],axis=1)).astype('float64').ravel()
    df_train=pd.DataFrame(y_pred_train)
    df_train.to_csv(path+'new_predictions/y_train_'+name+'.txt',sep=' ',header=False,index=False)

    y_pred_vali=modelq.predict(data_vali.drop(['relevance degree','qid'],axis=1)).astype('float64').ravel()
    df_vali=pd.DataFrame(y_pred_vali)
    df_vali.to_csv(path+'new_predictions/y_vali_'+name+'.txt',sep=' ',header=False,index=False)


    y_pred_test=modelq.predict(data_test.drop(['relevance degree','qid'],axis=1)).astype('float64').ravel()
    df_test=pd.DataFrame(y_pred_test)
    df_test.to_csv(path+'new_predictions/y_test_'+name+'.txt',sep=' ',header=False,index=False)
    print("                             ",counter,"                                     ")
    print("=================================================================\n\n\n\n")

In [10]:
from joblib import Parallel, delayed
Parallel(n_jobs=4,verbose=50)(delayed(NN_model_iteration)(my_iter,'LETOR_NN',counter) 
                               for(my_iter,counter) in zip(product(epochs,learning_rate,mon,act,
                                                                   inner_act,number_layers,number_neurons,dropout),
                                                           range(1,total_it+1)))

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:  8.8min
[Parallel(n_jobs=4)]: Done   1 out of   1 | elapsed:  8.8min finished


[None]