Skip to content

Commit

Permalink
Merge pull request #5 from DimitriF/master
Browse files Browse the repository at this point in the history
bias update
  • Loading branch information
bquast committed May 19, 2016
2 parents fd06501 + e7ffc91 commit 29974e6
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 18 deletions.
23 changes: 18 additions & 5 deletions R/predictr.R
Expand Up @@ -64,8 +64,15 @@ predictr <- function(model, X, hidden = FALSE, ...) {

# load neural network weights
time_synapse = model$time_synapse
if("bias_synapse" %in% names(model)){
use_bias = T
bias_synapse = model$bias_synapse
}else{
use_bias = F
}
recurrent_synapse = model$recurrent_synapse
start_from_end = model$start_from_end
sigmoid = model$sigmoid

# extract the network dimensions, only the binary dim
input_dim = dim(time_synapse[[1]])[1]
Expand Down Expand Up @@ -108,13 +115,19 @@ predictr <- function(model, X, hidden = FALSE, ...) {

layers <- list()
for(i in seq(length(synapse_dim) - 1)){
if(i == 1){ # first hidden layer, need to take x as input
layers[[i]] <- sigmoid((x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]))
} else if(i != length(synapse_dim) - 1 & i != 1){ #hidden layers not linked to input layer, depends of the last time step
layers[[i]] <- sigmoid((layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]))
if (i == 1) { # first hidden layer, need to take x as input
layers[[i]] <- (x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]])
} else if (i != length(synapse_dim) - 1 & i != 1){ #hidden layers not linked to input layer, depends of the last time step
layers[[i]] <- (layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]])
} else { # output layer depend only of the hidden layer of bellow
layers[[i]] <- sigmoid(layers[[i-1]] %*% time_synapse[[i]])
layers[[i]] <- layers[[i-1]] %*% time_synapse[[i]]
}
if(use_bias == T){ # apply the bias if applicable
layers[[i]] <- layers[[i]] + bias_synapse[[i]]
}
# apply the activation function
layers[[i]] <- sigmoid(layers[[i]], method=sigmoid)

# storing
store[[i]][j,position,] = layers[[i]]
if(i != length(synapse_dim) - 1){ # for all hidden layers, we need the previous state, looks like we duplicate the values here, it is also in the store list
Expand Down
37 changes: 27 additions & 10 deletions R/trainr.R
Expand Up @@ -13,6 +13,7 @@
#' @param start_from_end should the sequence start from the end
#' @param learningrate_decay coefficient to apply to the learning rate at each weight iteration
#' @param momentum coefficient of the last weight iteration to keep for faster learning
#' @param use_bias should the network use bias
#' @return a model to be used by the predictr function
#' @examples
#' # create training numbers
Expand All @@ -38,7 +39,7 @@
#' start_from_end = TRUE )
#'

trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hidden_dim = c(10), numepochs = 1, sigmoid = c('logistic', 'Gompertz', 'tanh'), start_from_end=FALSE) {
trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hidden_dim = c(10), numepochs = 1, sigmoid = c('logistic', 'Gompertz', 'tanh'), start_from_end=FALSE, use_bias = F) {

# find sigmoid
sigmoid <- match.arg(sigmoid)
Expand Down Expand Up @@ -74,12 +75,15 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid
for(i in seq(length(hidden_dim))){
recurrent_synapse[[i]] <- matrix(runif(n = hidden_dim[i]*hidden_dim[i], min=-1, max=1), nrow=hidden_dim[i])
}
bias_synapse = list() # bias for each unit, we calculate them anyway, for the moment, there is 2 if statement where we check it, the state calcul and the output object
for(i in seq(length(synapse_dim) - 1)){
bias_synapse[[i]] <- runif(synapse_dim[i+1],min=-0.1,max=0.1)
}

# initialize the update, stored in two lists
time_synapse_update = time_synapse
time_synapse_update = lapply(time_synapse_update,function(x){x*0})
recurrent_synapse_update = recurrent_synapse
recurrent_synapse_update = lapply(recurrent_synapse_update,function(x){x*0})
time_synapse_update = lapply(time_synapse,function(x){x*0})
bias_synapse_update = lapply(bias_synapse,function(x){x*0})
recurrent_synapse_update = lapply(recurrent_synapse,function(x){x*0})

# Storing layers states, filled with 0 for the moment
store <- list()
Expand Down Expand Up @@ -129,12 +133,17 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid
layers <- list()
for(i in seq(length(synapse_dim) - 1)){
if (i == 1) { # first hidden layer, need to take x as input
layers[[i]] <- sigmoid((x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]), method=sigmoid)
layers[[i]] <- (x%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]])
} else if (i != length(synapse_dim) - 1 & i != 1){ #hidden layers not linked to input layer, depends of the last time step
layers[[i]] <- sigmoid((layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]]), method=sigmoid)
layers[[i]] <- (layers[[i-1]]%*%time_synapse[[i]]) + (layers_values[[i]][dim(layers_values[[i]])[1],] %*% recurrent_synapse[[i]])
} else { # output layer depend only of the hidden layer of bellow
layers[[i]] <- sigmoid(layers[[i-1]] %*% time_synapse[[i]], method=sigmoid)
layers[[i]] <- layers[[i-1]] %*% time_synapse[[i]]
}
if(use_bias == T){ # apply the bias if applicable
layers[[i]] <- layers[[i]] + bias_synapse[[i]]
}
# apply the activation function
layers[[i]] <- sigmoid(layers[[i]], method=sigmoid)

# storing
store[[i]][j,position,] = layers[[i]]
Expand Down Expand Up @@ -165,7 +174,7 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid
# input states
x = a[pos_vec_back[position+1],]
# error at output layer
layer_up_delta = layer_2_deltas[dim(layer_2_deltas)[1]-position,]
layer_up_delta = array(layer_2_deltas[dim(layer_2_deltas)[1]-position,],dim=c(1,output_dim)) # arrray dimension because of bias colMeans function on layer_up_delta

for(i in (length(synapse_dim) - 1):1){
if(i != 1){ # need update for time and recurrent synapse
Expand All @@ -175,6 +184,7 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid
layer_current_delta = (future_layer_delta[[i-1]] %*% t(recurrent_synapse[[i-1]]) + layer_up_delta %*% t(time_synapse[[i]])) *
sigmoid_output_to_derivative(layer_current)
time_synapse_update[[i]] = time_synapse_update[[i]] + matrix(layer_current) %*% layer_up_delta
bias_synapse_update[[i]] = bias_synapse_update[[i]] + colMeans(layer_up_delta)
recurrent_synapse_update[[i-1]] = recurrent_synapse_update[[i-1]] + matrix(prev_layer_current) %*% layer_current_delta
layer_up_delta = layer_current_delta
future_layer_delta[[i-1]] = layer_current_delta
Expand All @@ -188,11 +198,13 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid

# Calculate the real update including learning rate
time_synapse_update = lapply(time_synapse_update,function(x){x* learningrate})
bias_synapse_update = lapply(bias_synapse_update,function(x){x* learningrate})
recurrent_synapse_update = lapply(recurrent_synapse_update,function(x){x* learningrate})

# Applying the update
for(i in seq(length(synapse_dim) - 1)){
time_synapse[[i]] <- time_synapse[[i]] + time_synapse_update[[i]]
bias_synapse[[i]] <- bias_synapse[[i]] + bias_synapse_update[[i]]
}
for(i in seq(length(hidden_dim))){
recurrent_synapse[[i]] <- recurrent_synapse[[i]] + recurrent_synapse_update[[i]]
Expand All @@ -203,6 +215,7 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid

# Initializing the update with the momentum
time_synapse_update = lapply(time_synapse_update,function(x){x* momentum})
bias_synapse_update = lapply(bias_synapse_update,function(x){x* momentum})
recurrent_synapse_update = lapply(recurrent_synapse_update,function(x){x* momentum})
}
# update best guess if error is minimal
Expand All @@ -218,10 +231,14 @@ trainr <- function(Y, X, learningrate, learningrate_decay = 1, momentum = 0, hid
error = error,
store = store,
store_best = store_best,
start_from_end = start_from_end)
start_from_end = start_from_end,
sigmoid = sigmoid)

attr(output, 'error') <- colMeans(error)

if(use_bias == T){ # append bias_synapse if applicable
output$bias_synapse = bias_synapse
}

# return output
return(output)
Expand Down
4 changes: 3 additions & 1 deletion man/trainr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion tests/testthat/test_rnn.R
Expand Up @@ -25,6 +25,8 @@ model <- trainr(Y=Y,
numepochs = 2,
start_from_end = TRUE )

set.seed(1) # need a new seed as RNG as moved during trainr because of bias generation, in order to compare before after the bias implementation

# create test inputs
A1 = int2bin( sample(0:127, 7000, replace=TRUE) )
A2 = int2bin( sample(0:127, 7000, replace=TRUE) )
Expand All @@ -36,4 +38,4 @@ A <- array( c(A1,A2), dim=c(dim(A1),2) )
B <- predictr(model, A)

# inspect the differences
expect_equal(sum(bin2int(B)), 886211)
expect_equal(sum(bin2int(B)), 888626)
2 changes: 1 addition & 1 deletion vignettes/rnn.Rmd
Expand Up @@ -26,7 +26,7 @@ We can view the code of the main `rnn()` function by calling it without the para
trainr
```

As can be seen from the above, the model relies on two other functions that are available trough the `sigmoid` package.
As can be seen from the above, the model relies on two other functions that are available through the `sigmoid` package.

The first function is `logistic()`, which converts an integer to its sigmoid value.

Expand Down

0 comments on commit 29974e6

Please sign in to comment.