diff --git a/R/predictr.R b/R/predictr.R index 93ae6d9..20c2843 100644 --- a/R/predictr.R +++ b/R/predictr.R @@ -64,8 +64,15 @@ predictr <- function(model, X, hidden = FALSE, ...) { # load neural network weights time_synapse = model$time_synapse + if("bias_synapse" %in% names(model)){ + use_bias = T + bias_synapse = model$bias_synapse + }else{ + use_bias = F + } recurrent_synapse = model$recurrent_synapse start_from_end = model$start_from_end + sigmoid = model$sigmoid # extract the network dimensions, only the binary dim input_dim = dim(time_synapse[[1]])[1] @@ -108,13 +115,19 @@ predictr <- function(model, X, hidden = FALSE, ...) { layers <- list() for(i in seq(length(synapse_dim) - 1)){ - if(i == 1){ # first hidden layer, need to take x as input - layers[[i]] <- sigmoid((x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]])) - } else if(i != length(synapse_dim) - 1 & i != 1){ #hidden layers not linked to input layer, depends of the last time step - layers[[i]] <- sigmoid((layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]])) + if (i == 1) { # first hidden layer, need to take x as input + layers[[i]] <- (x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]) + } else if (i != length(synapse_dim) - 1 & i != 1){ #hidden layers not linked to input layer, depends of the last time step + layers[[i]] <- (layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]) } else { # output layer depend only of the hidden layer of bellow - layers[[i]] <- sigmoid(layers[[i-1]] %*% time_synapse[[i]]) + layers[[i]] <- layers[[i-1]] %*% time_synapse[[i]] + } + if(use_bias == T){ # apply the bias if applicable + layers[[i]] <- layers[[i]] + bias_synapse[[i]] } + # apply the activation function + layers[[i]] <- sigmoid(layers[[i]], method=sigmoid) + # storing store[[i]][j,position,] = layers[[i]] if(i != length(synapse_dim) - 1){ # for all hidden layers, we need the previous state, looks like we duplicate the values here, it is also in the store list diff --git a/R/trainr.R b/R/trainr.R index 51946d3..e863936 100644 --- a/R/trainr.R +++ b/R/trainr.R @@ -13,6 +13,7 @@ #' @param start_from_end should the sequence start from the end #' @param learningrate_decay coefficient to apply to the learning rate at each weight iteration #' @param momentum coefficient of the last weight iteration to keep for faster learning +#' @param use_bias should the network use bias #' @return a model to be used by the predictr function #' @examples #' # create training numbers @@ -38,7 +39,7 @@ #' start_from_end = TRUE ) #' -trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hidden_dim = c(10), numepochs = 1, sigmoid = c('logistic', 'Gompertz', 'tanh'), start_from_end=FALSE) { +trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hidden_dim = c(10), numepochs = 1, sigmoid = c('logistic', 'Gompertz', 'tanh'), start_from_end=FALSE, use_bias = F) { # find sigmoid sigmoid <- match.arg(sigmoid) @@ -74,12 +75,15 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid for(i in seq(length(hidden_dim))){ recurrent_synapse[[i]] <- matrix(runif(n = hidden_dim[i]*hidden_dim[i], min=-1, max=1), nrow=hidden_dim[i]) } + bias_synapse = list() # bias for each unit, we calculate them anyway, for the moment, there is 2 if statement where we check it, the state calcul and the output object + for(i in seq(length(synapse_dim) - 1)){ + bias_synapse[[i]] <- runif(synapse_dim[i+1],min=-0.1,max=0.1) + } # initialize the update, stored in two lists - time_synapse_update = time_synapse - time_synapse_update = lapply(time_synapse_update,function(x){x*0}) - recurrent_synapse_update = recurrent_synapse - recurrent_synapse_update = lapply(recurrent_synapse_update,function(x){x*0}) + time_synapse_update = lapply(time_synapse,function(x){x*0}) + bias_synapse_update = lapply(bias_synapse,function(x){x*0}) + recurrent_synapse_update = lapply(recurrent_synapse,function(x){x*0}) # Storing layers states, filled with 0 for the moment store <- list() @@ -129,12 +133,17 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid layers <- list() for(i in seq(length(synapse_dim) - 1)){ if (i == 1) { # first hidden layer, need to take x as input - layers[[i]] <- sigmoid((x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]), method=sigmoid) + layers[[i]] <- (x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]) } else if (i != length(synapse_dim) - 1 & i != 1){ #hidden layers not linked to input layer, depends of the last time step - layers[[i]] <- sigmoid((layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]), method=sigmoid) + layers[[i]] <- (layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]) } else { # output layer depend only of the hidden layer of bellow - layers[[i]] <- sigmoid(layers[[i-1]] %*% time_synapse[[i]], method=sigmoid) + layers[[i]] <- layers[[i-1]] %*% time_synapse[[i]] } + if(use_bias == T){ # apply the bias if applicable + layers[[i]] <- layers[[i]] + bias_synapse[[i]] + } + # apply the activation function + layers[[i]] <- sigmoid(layers[[i]], method=sigmoid) # storing store[[i]][j,position,] = layers[[i]] @@ -165,7 +174,7 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid # input states x = a[pos_vec_back[position+1],] # error at output layer - layer_up_delta = layer_2_deltas[dim(layer_2_deltas)[1]-position,] + layer_up_delta = array(layer_2_deltas[dim(layer_2_deltas)[1]-position,],dim=c(1,output_dim)) # arrray dimension because of bias colMeans function on layer_up_delta for(i in (length(synapse_dim) - 1):1){ if(i != 1){ # need update for time and recurrent synapse @@ -175,6 +184,7 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid layer_current_delta = (future_layer_delta[[i-1]] %*% t(recurrent_synapse[[i-1]]) + layer_up_delta %*% t(time_synapse[[i]])) * sigmoid_output_to_derivative(layer_current) time_synapse_update[[i]] = time_synapse_update[[i]] + matrix(layer_current) %*% layer_up_delta + bias_synapse_update[[i]] = bias_synapse_update[[i]] + colMeans(layer_up_delta) recurrent_synapse_update[[i-1]] = recurrent_synapse_update[[i-1]] + matrix(prev_layer_current) %*% layer_current_delta layer_up_delta = layer_current_delta future_layer_delta[[i-1]] = layer_current_delta @@ -188,11 +198,13 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid # Calculate the real update including learning rate time_synapse_update = lapply(time_synapse_update,function(x){x* learningrate}) + bias_synapse_update = lapply(bias_synapse_update,function(x){x* learningrate}) recurrent_synapse_update = lapply(recurrent_synapse_update,function(x){x* learningrate}) # Applying the update for(i in seq(length(synapse_dim) - 1)){ time_synapse[[i]] <- time_synapse[[i]] + time_synapse_update[[i]] + bias_synapse[[i]] <- bias_synapse[[i]] + bias_synapse_update[[i]] } for(i in seq(length(hidden_dim))){ recurrent_synapse[[i]] <- recurrent_synapse[[i]] + recurrent_synapse_update[[i]] @@ -203,6 +215,7 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid # Initializing the update with the momentum time_synapse_update = lapply(time_synapse_update,function(x){x* momentum}) + bias_synapse_update = lapply(bias_synapse_update,function(x){x* momentum}) recurrent_synapse_update = lapply(recurrent_synapse_update,function(x){x* momentum}) } # update best guess if error is minimal @@ -218,10 +231,14 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid error = error, store = store, store_best = store_best, - start_from_end = start_from_end) + start_from_end = start_from_end, + sigmoid = sigmoid) attr(output, 'error') <- colMeans(error) + if(use_bias == T){ # append bias_synapse if applicable + output$bias_synapse = bias_synapse + } # return output return(output) diff --git a/man/trainr.Rd b/man/trainr.Rd index 3f1857c..d6306a3 100644 --- a/man/trainr.Rd +++ b/man/trainr.Rd @@ -6,7 +6,7 @@ \usage{ trainr(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hidden_dim = c(10), numepochs = 1, sigmoid = c("logistic", "Gompertz", - "tanh"), start_from_end = FALSE) + "tanh"), start_from_end = FALSE, use_bias = F) } \arguments{ \item{Y}{array of output values, dim 1: samples (must be equal to dim 1 of X), dim 2: time (must be equal to dim 2 of X), dim 3: variables (could be 1 or more, if a matrix, will be coerce to array)} @@ -26,6 +26,8 @@ trainr(Y, X, learningrate, learningrate_decay = 1, momentum = 0, \item{sigmoid}{method to be passed on the to the sigmoid function} \item{start_from_end}{should the sequence start from the end} + +\item{use_bias}{should the network use bias} } \value{ a model to be used by the predictr function diff --git a/tests/testthat/test_rnn.R b/tests/testthat/test_rnn.R index 1ae07cc..6805103 100644 --- a/tests/testthat/test_rnn.R +++ b/tests/testthat/test_rnn.R @@ -25,6 +25,8 @@ model <- trainr(Y=Y, numepochs = 2, start_from_end = TRUE ) +set.seed(1) # need a new seed as RNG as moved during trainr because of bias generation, in order to compare before after the bias implementation + # create test inputs A1 = int2bin( sample(0:127, 7000, replace=TRUE) ) A2 = int2bin( sample(0:127, 7000, replace=TRUE) ) @@ -36,4 +38,4 @@ A <- array( c(A1,A2), dim=c(dim(A1),2) ) B <- predictr(model, A) # inspect the differences -expect_equal(sum(bin2int(B)), 886211) +expect_equal(sum(bin2int(B)), 888626) diff --git a/vignettes/rnn.Rmd b/vignettes/rnn.Rmd index 3b73d43..78cf166 100644 --- a/vignettes/rnn.Rmd +++ b/vignettes/rnn.Rmd @@ -26,7 +26,7 @@ We can view the code of the main `rnn()` function by calling it without the para trainr ``` -As can be seen from the above, the model relies on two other functions that are available trough the `sigmoid` package. +As can be seen from the above, the model relies on two other functions that are available through the `sigmoid` package. The first function is `logistic()`, which converts an integer to its sigmoid value.