From 617b3a5bc1481cd329d0255e61350bbdb6e1fe34 Mon Sep 17 00:00:00 2001
From: Douglas Bagnall <douglas@halo.gen.nz>
Date: Fri, 1 Aug 2014 16:51:28 +1200
Subject: [PATCH] Add optional pre-synaptic noise

This adds gaussian noise after the matrix calculation, and before the
non-linearity (i.e. the rectifier). On test_backprop, the effect seems to
be to speed up each generation, but to converge at a higher entropy than
would be obtained without noise.

Not very well tested outside of test_backprop.
---
 charmodel-classify.c    |  2 +-
 charmodel-helpers.h     |  4 ++--
 charmodel-predict.c     |  8 ++++----
 context-recurse.c       |  6 +++---
 gstclassify.c           | 21 ++++++++++++++++++---
 gstparrot.c             |  5 +++--
 gstparrot.h             |  2 +-
 gstrnnca.c              |  8 ++++----
 gstrnnca.h              |  1 +
 recur-context.h         |  1 +
 recur-nn-helpers.h      | 14 ++++++++++++++
 recur-nn-init.c         | 15 ++++++++++-----
 recur-nn-io.c           | 13 +++++++++----
 recur-nn.c              | 15 +++++++++------
 recur-nn.h              | 10 +++++-----
 test/test_fb_backprop.c |  6 +++---
 test_backprop.c         |  7 ++++++-
 17 files changed, 94 insertions(+), 44 deletions(-)

diff --git a/charmodel-classify.c b/charmodel-classify.c
index 12fe608..fc3e892 100644
--- a/charmodel-classify.c
+++ b/charmodel-classify.c
@@ -93,7 +93,7 @@ rnn_char_classify_epoch(RnnCharClassifier *model, const RnnCharClassifiedChar *t
       RnnCharClassifiedChar t = text[offset];
       RecurNN *n = nets[j];
       rnn_bptt_advance(n);
-      float *answer = one_hot_opinion(net, t.symbol);
+      float *answer = one_hot_opinion(net, t.symbol, net->presynaptic_noise);
       if (t.class != NO_CLASS){
         float *error = n->bptt->o_error;
         ASSUME_ALIGNED(error);
diff --git a/charmodel-helpers.h b/charmodel-helpers.h
index 57b77dc..8bf0ed5 100644
--- a/charmodel-helpers.h
+++ b/charmodel-helpers.h
@@ -11,7 +11,7 @@ capped_log2f(float x){
 }
 
 static inline float*
-one_hot_opinion(RecurNN *net, int hot){
+one_hot_opinion(RecurNN *net, int hot, float presynaptic_noise){
   float *inputs;
   int len;
   if (net->bottom_layer){
@@ -26,6 +26,6 @@ one_hot_opinion(RecurNN *net, int hot){
   //XXX could just set the previous one to zero (i.e. remember it)
   memset(inputs, 0, len * sizeof(float));
   inputs[hot] = 1.0f;
-  return rnn_opinion(net, NULL);
+  return rnn_opinion(net, NULL, presynaptic_noise);
 }
 
diff --git a/charmodel-predict.c b/charmodel-predict.c
index 8361bdb..0bba8c4 100644
--- a/charmodel-predict.c
+++ b/charmodel-predict.c
@@ -17,7 +17,7 @@ This uses the RNN to predict the next character in a text sequence.
 static inline float
 net_error_bptt(RecurNN *net, float *restrict error, int c, int next, int *correct){
   ASSUME_ALIGNED(error);
-  float *answer = one_hot_opinion(net, c);
+  float *answer = one_hot_opinion(net, c, net->presynaptic_noise);
   int winner;
   winner = softmax_best_guess(error, answer, net->output_size);
   *correct = (winner == next);
@@ -29,7 +29,7 @@ static inline int
 guess_next_character(RecurNN *net, int hot, float bias){
   int i;
   float r;
-  float *answer = one_hot_opinion(net, hot);
+  float *answer = one_hot_opinion(net, hot, 0);
   ASSUME_ALIGNED(answer);
   int len = net->output_size;
   if (bias >= 100){
@@ -67,10 +67,10 @@ validate(RecurNN *net, const u8 *text, int len){
   /*skip the first few because state depends too much on previous experience */
   int skip = MIN(len / 10, 5);
   for (i = 0; i < skip; i++){
-    one_hot_opinion(net, text[i]);
+    one_hot_opinion(net, text[i], 0);
   }
   for (; i < len - 1; i++){
-    float *answer = one_hot_opinion(net, text[i]);
+    float *answer = one_hot_opinion(net, text[i], 0);
     softmax(error, answer, n_chars);
     float e = error[text[i + 1]];
     entropy += capped_log2f(e);
diff --git a/context-recurse.c b/context-recurse.c
index 688e778..70dbb46 100644
--- a/context-recurse.c
+++ b/context-recurse.c
@@ -93,7 +93,7 @@ recur_setup_nets(RecurContext *context, const char *log_file)
   if (net == NULL){
     net = rnn_new(RECUR_N_MFCCS + RECUR_N_VIDEO_FEATURES,
         RECUR_N_HIDDEN, RECUR_OUTPUT_SIZE, flags, RECUR_RNG_SEED,
-        log_file, RECUR_BPTT_DEPTH, LEARN_RATE, MOMENTUM);
+        log_file, RECUR_BPTT_DEPTH, LEARN_RATE, PRESYNAPTIC_NOISE, MOMENTUM);
     rnn_randomise_weights_auto(net);
   }
   context->net = net;
@@ -136,7 +136,7 @@ recur_train_nets(RecurContext *context, RecurFrame *src_frame,
         RECUR_INPUT_WIDTH + 2, RECUR_INPUT_HEIGHT + 2, t->x - t->scale, t->y - t->scale,
         t->scale * RECUR_RESOLUTION_GAIN);
 
-    float *answer = rnn_opinion(net, NULL);
+    float *answer = rnn_opinion(net, NULL, net->presynaptic_noise);
     ASSUME_ALIGNED(answer);
     fast_sigmoid_array(answer, answer, net->o_size);
 
@@ -232,7 +232,7 @@ rnn_recursive_opinion(RecurContext *context, int index)
   int i;
   RecurNN **constructors = context->constructors;
   RecurNN *net = constructors[index];
-  float *image = rnn_opinion(net, NULL);
+  float *image = rnn_opinion(net, NULL, 0);
   const int mul = RECUR_RESOLUTION_GAIN * RECUR_RESOLUTION_GAIN;
   int first_child = index * mul + 1;
   if (first_child < RECUR_N_CONSTRUCTORS){
diff --git a/gstclassify.c b/gstclassify.c
index c162850..6f8e244 100644
--- a/gstclassify.c
+++ b/gstclassify.c
@@ -62,6 +62,7 @@ enum
   PROP_CONFIRMATION_LAG,
   PROP_LOAD_NET_NOW,
   PROP_WINDOWS_PER_SECOND,
+  PROP_PRESYNAPTIC_NOISE,
 
   PROP_LAST
 };
@@ -90,6 +91,7 @@ enum
 #define DEFAULT_PROP_WEIGHT_INIT_SCALE 0.0f
 #define DEFAULT_PROP_GENERATION 0
 #define DEFAULT_PROP_WINDOWS_PER_SECOND 0
+#define DEFAULT_PROP_PRESYNAPTIC_NOISE 0
 
 #define DEFAULT_PROP_CLASSES "01"
 #define DEFAULT_PROP_BPTT_DEPTH 30
@@ -562,6 +564,13 @@ gst_classify_class_init (GstClassifyClass * klass)
           DEFAULT_PROP_WEIGHT_INIT_SCALE,
           G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS));
 
+  g_object_class_install_property (gobject_class, PROP_PRESYNAPTIC_NOISE,
+      g_param_spec_float("presynaptic-noise", "presynaptic-noise",
+          "Add this much noise before nonlinear tranform",
+          0, G_MAXFLOAT,
+          DEFAULT_PROP_PRESYNAPTIC_NOISE,
+          G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS));
+
   g_object_class_install_property (gobject_class, PROP_GENERATION,
       g_param_spec_uint("generation", "generation",
           "Read the net's training generation",
@@ -913,6 +922,8 @@ create_net(GstClassify *self, int bottom_layer_size,
   GST_DEBUG("rng seed %lu", rng_seed);
   float weight_init_scale = PP_GET_FLOAT(self, PROP_WEIGHT_INIT_SCALE,
     DEFAULT_PROP_WEIGHT_INIT_SCALE);
+  float presynaptic_noise = PP_GET_FLOAT(self, PROP_PRESYNAPTIC_NOISE,
+    DEFAULT_PROP_PRESYNAPTIC_NOISE);
 
   int lawnmower = PP_GET_BOOLEAN(self, PROP_LAWN_MOWER, DEFAULT_PROP_LAWN_MOWER);
   if (lawnmower){
@@ -923,7 +934,7 @@ create_net(GstClassify *self, int bottom_layer_size,
   }
   net = rnn_new_with_bottom_layer(n_features, bottom_layer_size, hidden_size,
       top_layer_size, flags, rng_seed,
-      NULL, bptt_depth, learn_rate, momentum, 0);
+      NULL, bptt_depth, learn_rate, momentum, presynaptic_noise, 0);
 
   initialise_net(self, net);
 
@@ -1417,6 +1428,9 @@ maybe_set_net_scalar(GstClassify *self, guint prop_id, const GValue *value)
     case PROP_MOMENTUM:
       SET_FLOAT(net->bptt->momentum);
       break;
+    case PROP_PRESYNAPTIC_NOISE:
+      SET_FLOAT(net->presynaptic_noise);
+      break;
     case PROP_BOTTOM_LEARN_RATE_SCALE:
       if (net->bottom_layer){
         SET_FLOAT(net->bottom_layer->learn_rate_scale);
@@ -1515,6 +1529,7 @@ gst_classify_set_property (GObject * object, guint prop_id, const GValue * value
     case PROP_TOP_LEARN_RATE_SCALE:
     case PROP_BOTTOM_LEARN_RATE_SCALE:
     case PROP_LEARN_RATE:
+    case PROP_PRESYNAPTIC_NOISE:
     case PROP_MOMENTUM:
       maybe_set_net_scalar(self, prop_id, value);
       break;
@@ -1789,7 +1804,7 @@ prepare_channel_features(GstClassify *self, s16 *buffer_i, int j){
 static inline float
 train_channel(GstClassify *self, ClassifyChannel *c, int *win_count){
   RecurNN *net = c->net;
-  float *answer = rnn_opinion(net, c->features);
+  float *answer = rnn_opinion(net, c->features, net->presynaptic_noise);
   float *error = net->bptt->o_error;
   float wrongness = 0;
   for (int i = 0; i < self->n_groups; i++){
@@ -1923,7 +1938,7 @@ emit_opinions(GstClassify *self, GstClockTime pts){
       ClassifyChannel *c = prepare_channel_features(self, buffer, j);
       RecurNN *net = c->net;
       float *error = net->bptt->o_error;
-      float *answer = rnn_opinion(net, c->features);
+      float *answer = rnn_opinion(net, c->features, 0);
       for (i = 0; i < self->n_groups; i++){
         ClassifyClassGroup *g = &self->class_groups[i];
         int o = g->offset;
diff --git a/gstparrot.c b/gstparrot.c
index 38ef0f3..7d0d487 100644
--- a/gstparrot.c
+++ b/gstparrot.c
@@ -260,7 +260,8 @@ load_or_create_net(GstParrot *self){
   if (net == NULL){
     net = rnn_new(PARROT_N_FEATURES, self->hidden_size,
         PARROT_N_FEATURES, PARROT_RNN_FLAGS, PARROT_RNG_SEED,
-        NULL, PARROT_BPTT_DEPTH, self->learn_rate, MOMENTUM);
+        NULL, PARROT_BPTT_DEPTH, self->learn_rate, PARROT_PRESYNAPTIC_NOISE,
+        MOMENTUM);
     rnn_randomise_weights_auto(net);
   }
   else {
@@ -453,7 +454,7 @@ possibly_save_net(RecurNN *net, char *filename)
 
 static inline float *
 tanh_opinion(RecurNN *net, float *in){
-  float *answer = rnn_opinion(net, in);
+  float *answer = rnn_opinion(net, in, 0);
   for (int i = 0; i < net->output_size; i++){
     answer[i] = fast_tanhf(answer[i]);
   }
diff --git a/gstparrot.h b/gstparrot.h
index fe62bf3..f33f7cf 100644
--- a/gstparrot.h
+++ b/gstparrot.h
@@ -36,7 +36,7 @@ G_BEGIN_DECLS
 #define PARROT_MFCC_MAX_FREQ (PARROT_RATE * 0.499)
 #define PARROT_MFCC_KNEE_FREQ 700
 #define PARROT_MFCC_FOCUS_FREQ 0
-
+#define PARROT_PRESYNAPTIC_NOISE 0
 
 #define PARROT_EXTRA_FLAGS (RNN_NET_FLAG_BPTT_ADAPTIVE_MIN_ERROR )
 
diff --git a/gstrnnca.c b/gstrnnca.c
index f8eca97..d0a0741 100644
--- a/gstrnnca.c
+++ b/gstrnnca.c
@@ -36,7 +36,6 @@ enum
   PROP_MOMENTUM,
 };
 
-
 #define DEFAULT_PROP_PGM_DUMP ""
 #define DEFAULT_PROP_LOG_FILE ""
 #define DEFAULT_PROP_OFFSETS RNNCA_DEFAULT_PATTERN
@@ -327,7 +326,8 @@ load_or_create_net(GstRnnca *self){
     int input_size = self->len_Y  + self->len_C * 2 + self->len_pos;
     net = rnn_new(input_size, self->hidden_size, 3,
         RNNCA_RNN_FLAGS, RNNCA_RNG_SEED,
-        NULL, RNNCA_BPTT_DEPTH, DEFAULT_LEARN_RATE, self->momentum);
+        NULL, RNNCA_BPTT_DEPTH, DEFAULT_LEARN_RATE,
+        RNNCA_PRESYNAPTIC_NOISE, self->momentum);
     rnn_randomise_weights_auto(net);
     //net->bptt->ho_scale = 0.25;
   }
@@ -697,7 +697,7 @@ train_net(GstRnnca *self, RnncaTrainer *t, RnncaFrame *prev,  RnncaFrame *now){
   /*trainers are not on edges, so edge condition doesn't much matter */
   fill_net_inputs(self, net, prev, t->x, t->y, 1);
   float *answer;
-  answer = rnn_opinion(net, NULL);
+  answer = rnn_opinion(net, NULL, net->presynaptic_noise);
   fast_sigmoid_array(answer, answer, 3);
   offset = t->y * RNNCA_WIDTH + t->x;
   GST_DEBUG("x %d, y %d, offset %d", t->x, t->y, offset);
@@ -812,7 +812,7 @@ fill_frame(GstRnnca *self, GstVideoFrame *frame){
     for (x = 0; x < RNNCA_WIDTH; x++){
       RecurNN *net = self->constructors[y * RNNCA_WIDTH + x];
       fill_net_inputs(self, net, self->play_frame, x, y, self->edges);
-      float *answer = rnn_opinion(net, NULL);
+      float *answer = rnn_opinion(net, NULL, 0);
       fast_sigmoid_array(answer, answer, 3);
       GST_LOG("answer gen %d, x %d y %d, %.2g %.2g %.2g",
           net->generation, x, y, answer[0], answer[1], answer[2]);
diff --git a/gstrnnca.h b/gstrnnca.h
index c3b1c7f..9774a85 100644
--- a/gstrnnca.h
+++ b/gstrnnca.h
@@ -22,6 +22,7 @@ G_BEGIN_DECLS
 #define RNNCA_BPTT_DEPTH 10
 
 #define RNNCA_DO_TEMPORAL_LOGGING 0
+#define RNNCA_PRESYNAPTIC_NOISE 0
 
 #define LONG_WALK 0
 
diff --git a/recur-context.h b/recur-context.h
index 2aebc4f..499bd4e 100644
--- a/recur-context.h
+++ b/recur-context.h
@@ -53,6 +53,7 @@ typedef s16 audio_sample;
 #define RECUR_OUTPUT_SIZE (RECUR_OUTPUT_HEIGHT * RECUR_OUTPUT_WIDTH * 3)
 
 #define LEARN_RATE 1e-5
+#define PRESYNAPTIC_NOISE 0
 #define MOMENTUM 0.95
 
 #define RECUR_FQ_LENGTH 16
diff --git a/recur-nn-helpers.h b/recur-nn-helpers.h
index d88923d..fc398d7 100644
--- a/recur-nn-helpers.h
+++ b/recur-nn-helpers.h
@@ -166,4 +166,18 @@ zero_aligned_array(float *array, int size){
 #endif
 }
 
+static inline void
+add_array_noise(rand_ctx *rng, float *array, int len, float deviation){
+  for (int i = 0; i < len; i++){
+    float noise = cheap_gaussian_noise(rng) * deviation;
+    array[i] += noise;
+  }
+}
+
+#define MAYBE_ADD_ARRAY_NOISE(rng, array, len, dev) do {        \
+    if (dev)                                                    \
+      add_array_noise(rng, array, len, dev);                    \
+  } while(0)
+
+
 #endif
diff --git a/recur-nn-init.c b/recur-nn-init.c
index fd19630..42f0e46 100644
--- a/recur-nn-init.c
+++ b/recur-nn-init.c
@@ -69,7 +69,7 @@ new_bptt(RecurNN *net, int depth, float learn_rate, float momentum, u32 flags){
 RecurNN *
 rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags,
     u64 rng_seed, const char *log_file, int bptt_depth, float learn_rate,
-    float momentum){
+    float momentum, float presynaptic_noise){
   RecurNN *net = calloc(1, sizeof(RecurNN));
   float *fm;
   /*sizes */
@@ -89,6 +89,7 @@ rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags,
   net->ho_size = ho_size;
   net->generation = 0;
   net->flags = flags;
+  net->presynaptic_noise = presynaptic_noise;
   init_rand64_maybe_randomly(&net->rng, rng_seed);
 
   size_t alloc_bytes = (i_size + h_size + o_size) * sizeof(float);
@@ -177,7 +178,7 @@ rnn_new_extra_layer(int input_size, int output_size, int overlap,
 RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size,
     int hidden_size, int output_size, u32 flags, u64 rng_seed,
     const char *log_file, int bptt_depth, float learn_rate,
-    float momentum, int convolutional_overlap)
+    float momentum, float presynaptic_noise, int convolutional_overlap)
 {
   RecurNN *net;
   if (r_input_size == 0){
@@ -185,12 +186,14 @@ RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size,
         "due to zero internal size");
     flags &= ~RNN_NET_FLAG_BOTTOM_LAYER;
     net = rnn_new(n_inputs, hidden_size, output_size,
-        flags, rng_seed, log_file, bptt_depth, learn_rate, momentum);
+        flags, rng_seed, log_file, bptt_depth, learn_rate, momentum,
+        presynaptic_noise);
   }
   else {
     flags |= RNN_NET_FLAG_BOTTOM_LAYER;
     net = rnn_new(r_input_size, hidden_size, output_size,
-        flags, rng_seed, log_file, bptt_depth, learn_rate, momentum);
+        flags, rng_seed, log_file, bptt_depth, learn_rate, momentum,
+        presynaptic_noise);
 
     net->bottom_layer = rnn_new_extra_layer(n_inputs, r_input_size,
         convolutional_overlap, net->flags);
@@ -300,7 +303,8 @@ rnn_clone(RecurNN *parent, u32 flags,
     momentum = 0;
   }
   net = rnn_new(parent->input_size, parent->hidden_size, parent->output_size,
-      flags, rng_seed, log_file, bptt_depth, learn_rate, momentum);
+      flags, rng_seed, log_file, bptt_depth, learn_rate, momentum,
+        parent->presynaptic_noise);
 
   if (parent->bptt && (flags & RNN_NET_FLAG_OWN_BPTT)){
     net->bptt->momentum_weight = parent->bptt->momentum_weight;
@@ -324,6 +328,7 @@ rnn_clone(RecurNN *parent, u32 flags,
   /*for now, the bottom layers can be shared */
   net->bottom_layer = parent->bottom_layer;
   net->generation = parent->generation;
+  net->presynaptic_noise = parent->presynaptic_noise;
   return net;
 }
 
diff --git a/recur-nn-io.c b/recur-nn-io.c
index 4f9ba68..126da1c 100644
--- a/recur-nn-io.c
+++ b/recur-nn-io.c
@@ -38,8 +38,9 @@ rnn_save_net(RecurNN *net, const char *filename, int backup){
    * 6: doesn't save BPTT training arrays (e.g. momentum) or hidden state
    * 7: includes net->clockwork_cycles
    * 8: drops net->clockwork_cycles
+   * 9: add net->presynaptic_noise
    */
-  const int version = 8;
+  const int version = 9;
   cdb_make_add(&cdbm, FORMAT_VERSION, strlen(FORMAT_VERSION), &version, sizeof(version));
 
 #define SAVE_SCALAR(obj, attr) do {                                     \
@@ -81,6 +82,7 @@ rnn_save_net(RecurNN *net, const char *filename, int backup){
   SAVE_SCALAR(net, ho_size);
   SAVE_SCALAR(net, generation);
   SAVE_SCALAR(net, flags);
+  SAVE_SCALAR(net, presynaptic_noise);
   SAVE_SCALAR(net, rng); /* a struct, should work? */
 
   SAVE_ARRAY(net, ih_weights, net->ih_size);
@@ -203,6 +205,7 @@ rnn_load_net(const char *filename){
   READ_SCALAR(net, rng);
   READ_SCALAR(net, generation);
   READ_SCALAR(net, flags);
+  READ_SCALAR_IF_VERSION_ELSE_DEFAULT(net, presynaptic_noise, 9, 0);
 
   if (tmpnet.flags & RNN_NET_FLAG_OWN_BPTT){
     READ_SCALAR(bptt, depth);
@@ -229,12 +232,13 @@ rnn_load_net(const char *filename){
     net = rnn_new_with_bottom_layer(tmpbl.input_size, tmpbl.output_size,
         tmpnet.hidden_size, tmpnet.output_size, tmpnet.flags, 0, NULL,
         tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum,
-        tmpbl.overlap);
+        tmpnet.presynaptic_noise, tmpbl.overlap);
   }
   else {
     net = rnn_new(tmpnet.input_size, tmpnet.hidden_size,
-      tmpnet.output_size, tmpnet.flags, 0, NULL,
-        tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum);
+        tmpnet.output_size, tmpnet.flags, 0, NULL,
+        tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum,
+        tmpnet.presynaptic_noise);
   }
   bptt = net->bptt;
   bottom_layer = net->bottom_layer;
@@ -272,6 +276,7 @@ rnn_load_net(const char *filename){
 
   CHECK_SCALAR(net, tmpnet, generation);
   CHECK_SCALAR(net, tmpnet, flags);
+  CHECK_SCALAR(net, tmpnet, presynaptic_noise);
 
   if (bptt){
     CHECK_SCALAR(bptt, tmpbptt, depth);
diff --git a/recur-nn.c b/recur-nn.c
index 18cc3fc..4d88994 100644
--- a/recur-nn.c
+++ b/recur-nn.c
@@ -80,7 +80,7 @@ maybe_scale_inputs(RecurNN *net){
 }
 
 float *
-rnn_opinion(RecurNN *net, const float *restrict inputs){
+rnn_opinion(RecurNN *net, const float *restrict inputs, float presynaptic_noise){
   /*If inputs is NULL, assume the inputs have already been set.*/
   float *restrict hiddens = net->hidden_layer;
   ASSUME_ALIGNED(hiddens);
@@ -93,6 +93,8 @@ rnn_opinion(RecurNN *net, const float *restrict inputs){
     }
     calculate_interlayer(layer->inputs, layer->i_size, layer->outputs,
          layer->o_size, layer->weights);
+    MAYBE_ADD_ARRAY_NOISE(&net->rng, layer->outputs + 1, net->input_size - 1,
+        presynaptic_noise);
     memcpy(net->real_inputs, layer->outputs, net->input_size * sizeof(float));
   }
   else if (inputs){
@@ -102,7 +104,7 @@ rnn_opinion(RecurNN *net, const float *restrict inputs){
   /*copy in hiddens */
   memcpy(net->input_layer, hiddens, INPUT_OFFSET(net) * sizeof(float));
 
-  /*bias, possibly unnecessary because it may not get overwritten */
+  /*bias, possibly unnecessary because it probably doesn't get overwritten */
   net->input_layer[0] = 1.0f;
 
   /* in emergencies, clamp the scale of the input vector */
@@ -110,6 +112,10 @@ rnn_opinion(RecurNN *net, const float *restrict inputs){
 
   calculate_interlayer(net->input_layer, net->i_size,
       hiddens, net->h_size, net->ih_weights);
+
+  MAYBE_ADD_ARRAY_NOISE(&net->rng, net->hidden_layer + 1, net->h_size - 1,
+      presynaptic_noise);
+
   for (int i = 1; i < net->h_size; i++){
     float h = hiddens[i] - RNN_HIDDEN_PENALTY;
     hiddens[i] = (h > 0.0f) ? h : 0.0f;
@@ -620,10 +626,7 @@ weight_noise(rand_ctx *rng, float *weights, int width, int stride, int height,
     float deviation){
   for (int y = 0; y < height; y++){
     float *row = weights + y * stride;
-    for (int x = 0; x < width; x++){
-      float noise = cheap_gaussian_noise(rng) * deviation;
-      row[x] += noise;
-    }
+    add_array_noise(rng, row, width, deviation);
   }
 }
 
diff --git a/recur-nn.h b/recur-nn.h
index d13d5eb..044453f 100644
--- a/recur-nn.h
+++ b/recur-nn.h
@@ -162,6 +162,7 @@ struct _RecurNN {
   RecurExtraLayer *bottom_layer;
   char *metadata;
   u32 generation;
+  float presynaptic_noise;
 };
 
 struct _RecurNNBPTT {
@@ -237,10 +238,9 @@ struct RecurInitialisationParameters {
 
 RecurNN * rnn_new(uint input_size, uint hidden_size, uint output_size,
     u32 flags, u64 rng_seed, const char *log_file, int depth, float learn_rate,
-    float momentum);
+    float momentum, float presynaptic_noise);
 
-RecurNN * rnn_clone(RecurNN *parent, u32 flags,
-    u64 rng_seed, const char *log_file);
+RecurNN * rnn_clone(RecurNN *parent, u32 flags, u64 rng_seed, const char *log_file);
 
 RecurExtraLayer *rnn_new_extra_layer(int input_size, int output_size, int overlap,
     u32 flags);
@@ -248,7 +248,7 @@ RecurExtraLayer *rnn_new_extra_layer(int input_size, int output_size, int overla
 RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size,
     int hidden_size, int output_size, u32 flags, u64 rng_seed,
     const char *log_file, int bptt_depth, float learn_rate,
-    float momentum, int convolutional_overlap);
+    float momentum, float presynaptic_noise, int convolutional_overlap);
 
 
 void rnn_set_log_file(RecurNN *net, const char * log_file, int append_dont_truncate);
@@ -268,7 +268,7 @@ void rnn_delete_net(RecurNN *net);
 RecurNN ** rnn_new_training_set(RecurNN *prototype, int n_nets);
 void rnn_delete_training_set(RecurNN **nets, int n_nets, int leave_prototype);
 
-float *rnn_opinion(RecurNN *net, const float *inputs);
+float *rnn_opinion(RecurNN *net, const float *inputs, float presynaptic_noise);
 
 void rnn_multi_pgm_dump(RecurNN *net, const char *dumpees, const char *basename);
 
diff --git a/test/test_fb_backprop.c b/test/test_fb_backprop.c
index 861fb07..cc10f9f 100644
--- a/test/test_fb_backprop.c
+++ b/test/test_fb_backprop.c
@@ -42,7 +42,7 @@ load_char_input(RecurNN *net, int c){
 static inline float
 net_error_bptt(RecurNN *net, float *error, int c, int next){
   load_char_input(net, c);
-  float *answer = rnn_opinion(net, NULL);
+  float *answer = rnn_opinion(net, NULL, net->presynaptic_noise);
   error[0] = (next & 1) - (answer[0] > 0);
   error[1] = (!!(next & 2)) - (answer[1] > 0);
   return (fabsf(error[0]) + fabsf(error[1])) * 0.5;
@@ -60,7 +60,7 @@ sgd_one(RecurNN *net, const int current, const int next, uint batch_size){
 static inline int
 char_opinion(RecurNN *net, int c){
   load_char_input(net, c);
-  float * answer = rnn_opinion(net, NULL);
+  float * answer = rnn_opinion(net, NULL, 0);
   int a = ((answer[1] > 0) << 1) | (answer[0] > 0);
   return a;
 }
@@ -132,7 +132,7 @@ main(void){
   feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
   RecurNN *net = rnn_new(INPUT_SIZE, HIDDEN_SIZE,
       INPUT_SIZE, RNN_NET_FLAG_STANDARD,
-      1, NET_LOG_FILE, BPTT_DEPTH, LEARN_RATE, MOMENTUM);
+      1, NET_LOG_FILE, BPTT_DEPTH, LEARN_RATE, 0, MOMENTUM);
   rnn_randomise_weights_auto(net);
   START_TIMER(epoch);
   epoch(net, 5000000);
diff --git a/test_backprop.c b/test_backprop.c
index 0f9144f..2310d66 100644
--- a/test_backprop.c
+++ b/test_backprop.c
@@ -100,6 +100,7 @@ Because of ccan/opt, --help will tell you something.
 #define DEFAULT_FIND_ALPHABET_THRESHOLD 0
 #define DEFAULT_FIND_ALPHABET_DIGIT_ADJUST 1.0
 #define DEFAULT_FIND_ALPHABET_ALPHA_ADJUST 1.0
+#define DEFAULT_PRESYNAPTIC_NOISE 0.0f
 
 #define BELOW_QUIET_LEVEL(quiet) if (opt_quiet < quiet)
 
@@ -168,6 +169,7 @@ static bool opt_collapse_space = DEFAULT_COLLAPSE_SPACE;
 static double opt_find_alphabet_threshold = DEFAULT_FIND_ALPHABET_THRESHOLD;
 static double opt_find_alphabet_digit_adjust = DEFAULT_FIND_ALPHABET_DIGIT_ADJUST;
 static double opt_find_alphabet_alpha_adjust = DEFAULT_FIND_ALPHABET_ALPHA_ADJUST;
+static float opt_presynaptic_noise = DEFAULT_PRESYNAPTIC_NOISE;
 
 #define IN_RANGE_01(x) (((x) >= 0.0f) && ((x) <= 1.0f))
 
@@ -315,6 +317,9 @@ static struct opt_table options[] = {
       &opt_find_alphabet_digit_adjust, "adjust digit frequency for alphabet calculations"),
   OPT_WITH_ARG("--find-alphabet-alpha-adjust", opt_set_doubleval, opt_show_doubleval,
       &opt_find_alphabet_alpha_adjust, "adjust letter frequency for alphabet calculation"),
+  OPT_WITH_ARG("--presynaptic-noise", opt_set_floatval, opt_show_floatval,
+      &opt_presynaptic_noise, "deviation of noise to add before non-linear transform"),
+
 
   OPT_WITHOUT_ARG("-h|--help", opt_usage_and_exit,
       ": Rnn modelling of text at the character level",
@@ -431,7 +436,7 @@ load_or_create_net(struct RnnCharMetadata *m, int alpha_len, int reload){
     net = rnn_new_with_bottom_layer(input_size, opt_bottom_layer,
         opt_hidden_size, output_size, flags, opt_rng_seed,
         opt_logfile, opt_bptt_depth, opt_learn_rate,
-        opt_momentum, 0);
+        opt_momentum, opt_presynaptic_noise, 0);
     initialise_net(net);
     net->bptt->momentum_weight = opt_momentum_weight;
     net->metadata = strdup(metadata);