From 617b3a5bc1481cd329d0255e61350bbdb6e1fe34 Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Fri, 1 Aug 2014 16:51:28 +1200 Subject: [PATCH] Add optional pre-synaptic noise This adds gaussian noise after the matrix calculation, and before the non-linearity (i.e. the rectifier). On test_backprop, the effect seems to be to speed up each generation, but to converge at a higher entropy than would be obtained without noise. Not very well tested outside of test_backprop. --- charmodel-classify.c | 2 +- charmodel-helpers.h | 4 ++-- charmodel-predict.c | 8 ++++---- context-recurse.c | 6 +++--- gstclassify.c | 21 ++++++++++++++++++--- gstparrot.c | 5 +++-- gstparrot.h | 2 +- gstrnnca.c | 8 ++++---- gstrnnca.h | 1 + recur-context.h | 1 + recur-nn-helpers.h | 14 ++++++++++++++ recur-nn-init.c | 15 ++++++++++----- recur-nn-io.c | 13 +++++++++---- recur-nn.c | 15 +++++++++------ recur-nn.h | 10 +++++----- test/test_fb_backprop.c | 6 +++--- test_backprop.c | 7 ++++++- 17 files changed, 94 insertions(+), 44 deletions(-) diff --git a/charmodel-classify.c b/charmodel-classify.c index 12fe608..fc3e892 100644 --- a/charmodel-classify.c +++ b/charmodel-classify.c @@ -93,7 +93,7 @@ rnn_char_classify_epoch(RnnCharClassifier *model, const RnnCharClassifiedChar *t RnnCharClassifiedChar t = text[offset]; RecurNN *n = nets[j]; rnn_bptt_advance(n); - float *answer = one_hot_opinion(net, t.symbol); + float *answer = one_hot_opinion(net, t.symbol, net->presynaptic_noise); if (t.class != NO_CLASS){ float *error = n->bptt->o_error; ASSUME_ALIGNED(error); diff --git a/charmodel-helpers.h b/charmodel-helpers.h index 57b77dc..8bf0ed5 100644 --- a/charmodel-helpers.h +++ b/charmodel-helpers.h @@ -11,7 +11,7 @@ capped_log2f(float x){ } static inline float* -one_hot_opinion(RecurNN *net, int hot){ +one_hot_opinion(RecurNN *net, int hot, float presynaptic_noise){ float *inputs; int len; if (net->bottom_layer){ @@ -26,6 +26,6 @@ one_hot_opinion(RecurNN *net, int hot){ //XXX could just set the previous one to zero (i.e. remember it) memset(inputs, 0, len * sizeof(float)); inputs[hot] = 1.0f; - return rnn_opinion(net, NULL); + return rnn_opinion(net, NULL, presynaptic_noise); } diff --git a/charmodel-predict.c b/charmodel-predict.c index 8361bdb..0bba8c4 100644 --- a/charmodel-predict.c +++ b/charmodel-predict.c @@ -17,7 +17,7 @@ This uses the RNN to predict the next character in a text sequence. static inline float net_error_bptt(RecurNN *net, float *restrict error, int c, int next, int *correct){ ASSUME_ALIGNED(error); - float *answer = one_hot_opinion(net, c); + float *answer = one_hot_opinion(net, c, net->presynaptic_noise); int winner; winner = softmax_best_guess(error, answer, net->output_size); *correct = (winner == next); @@ -29,7 +29,7 @@ static inline int guess_next_character(RecurNN *net, int hot, float bias){ int i; float r; - float *answer = one_hot_opinion(net, hot); + float *answer = one_hot_opinion(net, hot, 0); ASSUME_ALIGNED(answer); int len = net->output_size; if (bias >= 100){ @@ -67,10 +67,10 @@ validate(RecurNN *net, const u8 *text, int len){ /*skip the first few because state depends too much on previous experience */ int skip = MIN(len / 10, 5); for (i = 0; i < skip; i++){ - one_hot_opinion(net, text[i]); + one_hot_opinion(net, text[i], 0); } for (; i < len - 1; i++){ - float *answer = one_hot_opinion(net, text[i]); + float *answer = one_hot_opinion(net, text[i], 0); softmax(error, answer, n_chars); float e = error[text[i + 1]]; entropy += capped_log2f(e); diff --git a/context-recurse.c b/context-recurse.c index 688e778..70dbb46 100644 --- a/context-recurse.c +++ b/context-recurse.c @@ -93,7 +93,7 @@ recur_setup_nets(RecurContext *context, const char *log_file) if (net == NULL){ net = rnn_new(RECUR_N_MFCCS + RECUR_N_VIDEO_FEATURES, RECUR_N_HIDDEN, RECUR_OUTPUT_SIZE, flags, RECUR_RNG_SEED, - log_file, RECUR_BPTT_DEPTH, LEARN_RATE, MOMENTUM); + log_file, RECUR_BPTT_DEPTH, LEARN_RATE, PRESYNAPTIC_NOISE, MOMENTUM); rnn_randomise_weights_auto(net); } context->net = net; @@ -136,7 +136,7 @@ recur_train_nets(RecurContext *context, RecurFrame *src_frame, RECUR_INPUT_WIDTH + 2, RECUR_INPUT_HEIGHT + 2, t->x - t->scale, t->y - t->scale, t->scale * RECUR_RESOLUTION_GAIN); - float *answer = rnn_opinion(net, NULL); + float *answer = rnn_opinion(net, NULL, net->presynaptic_noise); ASSUME_ALIGNED(answer); fast_sigmoid_array(answer, answer, net->o_size); @@ -232,7 +232,7 @@ rnn_recursive_opinion(RecurContext *context, int index) int i; RecurNN **constructors = context->constructors; RecurNN *net = constructors[index]; - float *image = rnn_opinion(net, NULL); + float *image = rnn_opinion(net, NULL, 0); const int mul = RECUR_RESOLUTION_GAIN * RECUR_RESOLUTION_GAIN; int first_child = index * mul + 1; if (first_child < RECUR_N_CONSTRUCTORS){ diff --git a/gstclassify.c b/gstclassify.c index c162850..6f8e244 100644 --- a/gstclassify.c +++ b/gstclassify.c @@ -62,6 +62,7 @@ enum PROP_CONFIRMATION_LAG, PROP_LOAD_NET_NOW, PROP_WINDOWS_PER_SECOND, + PROP_PRESYNAPTIC_NOISE, PROP_LAST }; @@ -90,6 +91,7 @@ enum #define DEFAULT_PROP_WEIGHT_INIT_SCALE 0.0f #define DEFAULT_PROP_GENERATION 0 #define DEFAULT_PROP_WINDOWS_PER_SECOND 0 +#define DEFAULT_PROP_PRESYNAPTIC_NOISE 0 #define DEFAULT_PROP_CLASSES "01" #define DEFAULT_PROP_BPTT_DEPTH 30 @@ -562,6 +564,13 @@ gst_classify_class_init (GstClassifyClass * klass) DEFAULT_PROP_WEIGHT_INIT_SCALE, G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS)); + g_object_class_install_property (gobject_class, PROP_PRESYNAPTIC_NOISE, + g_param_spec_float("presynaptic-noise", "presynaptic-noise", + "Add this much noise before nonlinear tranform", + 0, G_MAXFLOAT, + DEFAULT_PROP_PRESYNAPTIC_NOISE, + G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS)); + g_object_class_install_property (gobject_class, PROP_GENERATION, g_param_spec_uint("generation", "generation", "Read the net's training generation", @@ -913,6 +922,8 @@ create_net(GstClassify *self, int bottom_layer_size, GST_DEBUG("rng seed %lu", rng_seed); float weight_init_scale = PP_GET_FLOAT(self, PROP_WEIGHT_INIT_SCALE, DEFAULT_PROP_WEIGHT_INIT_SCALE); + float presynaptic_noise = PP_GET_FLOAT(self, PROP_PRESYNAPTIC_NOISE, + DEFAULT_PROP_PRESYNAPTIC_NOISE); int lawnmower = PP_GET_BOOLEAN(self, PROP_LAWN_MOWER, DEFAULT_PROP_LAWN_MOWER); if (lawnmower){ @@ -923,7 +934,7 @@ create_net(GstClassify *self, int bottom_layer_size, } net = rnn_new_with_bottom_layer(n_features, bottom_layer_size, hidden_size, top_layer_size, flags, rng_seed, - NULL, bptt_depth, learn_rate, momentum, 0); + NULL, bptt_depth, learn_rate, momentum, presynaptic_noise, 0); initialise_net(self, net); @@ -1417,6 +1428,9 @@ maybe_set_net_scalar(GstClassify *self, guint prop_id, const GValue *value) case PROP_MOMENTUM: SET_FLOAT(net->bptt->momentum); break; + case PROP_PRESYNAPTIC_NOISE: + SET_FLOAT(net->presynaptic_noise); + break; case PROP_BOTTOM_LEARN_RATE_SCALE: if (net->bottom_layer){ SET_FLOAT(net->bottom_layer->learn_rate_scale); @@ -1515,6 +1529,7 @@ gst_classify_set_property (GObject * object, guint prop_id, const GValue * value case PROP_TOP_LEARN_RATE_SCALE: case PROP_BOTTOM_LEARN_RATE_SCALE: case PROP_LEARN_RATE: + case PROP_PRESYNAPTIC_NOISE: case PROP_MOMENTUM: maybe_set_net_scalar(self, prop_id, value); break; @@ -1789,7 +1804,7 @@ prepare_channel_features(GstClassify *self, s16 *buffer_i, int j){ static inline float train_channel(GstClassify *self, ClassifyChannel *c, int *win_count){ RecurNN *net = c->net; - float *answer = rnn_opinion(net, c->features); + float *answer = rnn_opinion(net, c->features, net->presynaptic_noise); float *error = net->bptt->o_error; float wrongness = 0; for (int i = 0; i < self->n_groups; i++){ @@ -1923,7 +1938,7 @@ emit_opinions(GstClassify *self, GstClockTime pts){ ClassifyChannel *c = prepare_channel_features(self, buffer, j); RecurNN *net = c->net; float *error = net->bptt->o_error; - float *answer = rnn_opinion(net, c->features); + float *answer = rnn_opinion(net, c->features, 0); for (i = 0; i < self->n_groups; i++){ ClassifyClassGroup *g = &self->class_groups[i]; int o = g->offset; diff --git a/gstparrot.c b/gstparrot.c index 38ef0f3..7d0d487 100644 --- a/gstparrot.c +++ b/gstparrot.c @@ -260,7 +260,8 @@ load_or_create_net(GstParrot *self){ if (net == NULL){ net = rnn_new(PARROT_N_FEATURES, self->hidden_size, PARROT_N_FEATURES, PARROT_RNN_FLAGS, PARROT_RNG_SEED, - NULL, PARROT_BPTT_DEPTH, self->learn_rate, MOMENTUM); + NULL, PARROT_BPTT_DEPTH, self->learn_rate, PARROT_PRESYNAPTIC_NOISE, + MOMENTUM); rnn_randomise_weights_auto(net); } else { @@ -453,7 +454,7 @@ possibly_save_net(RecurNN *net, char *filename) static inline float * tanh_opinion(RecurNN *net, float *in){ - float *answer = rnn_opinion(net, in); + float *answer = rnn_opinion(net, in, 0); for (int i = 0; i < net->output_size; i++){ answer[i] = fast_tanhf(answer[i]); } diff --git a/gstparrot.h b/gstparrot.h index fe62bf3..f33f7cf 100644 --- a/gstparrot.h +++ b/gstparrot.h @@ -36,7 +36,7 @@ G_BEGIN_DECLS #define PARROT_MFCC_MAX_FREQ (PARROT_RATE * 0.499) #define PARROT_MFCC_KNEE_FREQ 700 #define PARROT_MFCC_FOCUS_FREQ 0 - +#define PARROT_PRESYNAPTIC_NOISE 0 #define PARROT_EXTRA_FLAGS (RNN_NET_FLAG_BPTT_ADAPTIVE_MIN_ERROR ) diff --git a/gstrnnca.c b/gstrnnca.c index f8eca97..d0a0741 100644 --- a/gstrnnca.c +++ b/gstrnnca.c @@ -36,7 +36,6 @@ enum PROP_MOMENTUM, }; - #define DEFAULT_PROP_PGM_DUMP "" #define DEFAULT_PROP_LOG_FILE "" #define DEFAULT_PROP_OFFSETS RNNCA_DEFAULT_PATTERN @@ -327,7 +326,8 @@ load_or_create_net(GstRnnca *self){ int input_size = self->len_Y + self->len_C * 2 + self->len_pos; net = rnn_new(input_size, self->hidden_size, 3, RNNCA_RNN_FLAGS, RNNCA_RNG_SEED, - NULL, RNNCA_BPTT_DEPTH, DEFAULT_LEARN_RATE, self->momentum); + NULL, RNNCA_BPTT_DEPTH, DEFAULT_LEARN_RATE, + RNNCA_PRESYNAPTIC_NOISE, self->momentum); rnn_randomise_weights_auto(net); //net->bptt->ho_scale = 0.25; } @@ -697,7 +697,7 @@ train_net(GstRnnca *self, RnncaTrainer *t, RnncaFrame *prev, RnncaFrame *now){ /*trainers are not on edges, so edge condition doesn't much matter */ fill_net_inputs(self, net, prev, t->x, t->y, 1); float *answer; - answer = rnn_opinion(net, NULL); + answer = rnn_opinion(net, NULL, net->presynaptic_noise); fast_sigmoid_array(answer, answer, 3); offset = t->y * RNNCA_WIDTH + t->x; GST_DEBUG("x %d, y %d, offset %d", t->x, t->y, offset); @@ -812,7 +812,7 @@ fill_frame(GstRnnca *self, GstVideoFrame *frame){ for (x = 0; x < RNNCA_WIDTH; x++){ RecurNN *net = self->constructors[y * RNNCA_WIDTH + x]; fill_net_inputs(self, net, self->play_frame, x, y, self->edges); - float *answer = rnn_opinion(net, NULL); + float *answer = rnn_opinion(net, NULL, 0); fast_sigmoid_array(answer, answer, 3); GST_LOG("answer gen %d, x %d y %d, %.2g %.2g %.2g", net->generation, x, y, answer[0], answer[1], answer[2]); diff --git a/gstrnnca.h b/gstrnnca.h index c3b1c7f..9774a85 100644 --- a/gstrnnca.h +++ b/gstrnnca.h @@ -22,6 +22,7 @@ G_BEGIN_DECLS #define RNNCA_BPTT_DEPTH 10 #define RNNCA_DO_TEMPORAL_LOGGING 0 +#define RNNCA_PRESYNAPTIC_NOISE 0 #define LONG_WALK 0 diff --git a/recur-context.h b/recur-context.h index 2aebc4f..499bd4e 100644 --- a/recur-context.h +++ b/recur-context.h @@ -53,6 +53,7 @@ typedef s16 audio_sample; #define RECUR_OUTPUT_SIZE (RECUR_OUTPUT_HEIGHT * RECUR_OUTPUT_WIDTH * 3) #define LEARN_RATE 1e-5 +#define PRESYNAPTIC_NOISE 0 #define MOMENTUM 0.95 #define RECUR_FQ_LENGTH 16 diff --git a/recur-nn-helpers.h b/recur-nn-helpers.h index d88923d..fc398d7 100644 --- a/recur-nn-helpers.h +++ b/recur-nn-helpers.h @@ -166,4 +166,18 @@ zero_aligned_array(float *array, int size){ #endif } +static inline void +add_array_noise(rand_ctx *rng, float *array, int len, float deviation){ + for (int i = 0; i < len; i++){ + float noise = cheap_gaussian_noise(rng) * deviation; + array[i] += noise; + } +} + +#define MAYBE_ADD_ARRAY_NOISE(rng, array, len, dev) do { \ + if (dev) \ + add_array_noise(rng, array, len, dev); \ + } while(0) + + #endif diff --git a/recur-nn-init.c b/recur-nn-init.c index fd19630..42f0e46 100644 --- a/recur-nn-init.c +++ b/recur-nn-init.c @@ -69,7 +69,7 @@ new_bptt(RecurNN *net, int depth, float learn_rate, float momentum, u32 flags){ RecurNN * rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags, u64 rng_seed, const char *log_file, int bptt_depth, float learn_rate, - float momentum){ + float momentum, float presynaptic_noise){ RecurNN *net = calloc(1, sizeof(RecurNN)); float *fm; /*sizes */ @@ -89,6 +89,7 @@ rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags, net->ho_size = ho_size; net->generation = 0; net->flags = flags; + net->presynaptic_noise = presynaptic_noise; init_rand64_maybe_randomly(&net->rng, rng_seed); size_t alloc_bytes = (i_size + h_size + o_size) * sizeof(float); @@ -177,7 +178,7 @@ rnn_new_extra_layer(int input_size, int output_size, int overlap, RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size, int hidden_size, int output_size, u32 flags, u64 rng_seed, const char *log_file, int bptt_depth, float learn_rate, - float momentum, int convolutional_overlap) + float momentum, float presynaptic_noise, int convolutional_overlap) { RecurNN *net; if (r_input_size == 0){ @@ -185,12 +186,14 @@ RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size, "due to zero internal size"); flags &= ~RNN_NET_FLAG_BOTTOM_LAYER; net = rnn_new(n_inputs, hidden_size, output_size, - flags, rng_seed, log_file, bptt_depth, learn_rate, momentum); + flags, rng_seed, log_file, bptt_depth, learn_rate, momentum, + presynaptic_noise); } else { flags |= RNN_NET_FLAG_BOTTOM_LAYER; net = rnn_new(r_input_size, hidden_size, output_size, - flags, rng_seed, log_file, bptt_depth, learn_rate, momentum); + flags, rng_seed, log_file, bptt_depth, learn_rate, momentum, + presynaptic_noise); net->bottom_layer = rnn_new_extra_layer(n_inputs, r_input_size, convolutional_overlap, net->flags); @@ -300,7 +303,8 @@ rnn_clone(RecurNN *parent, u32 flags, momentum = 0; } net = rnn_new(parent->input_size, parent->hidden_size, parent->output_size, - flags, rng_seed, log_file, bptt_depth, learn_rate, momentum); + flags, rng_seed, log_file, bptt_depth, learn_rate, momentum, + parent->presynaptic_noise); if (parent->bptt && (flags & RNN_NET_FLAG_OWN_BPTT)){ net->bptt->momentum_weight = parent->bptt->momentum_weight; @@ -324,6 +328,7 @@ rnn_clone(RecurNN *parent, u32 flags, /*for now, the bottom layers can be shared */ net->bottom_layer = parent->bottom_layer; net->generation = parent->generation; + net->presynaptic_noise = parent->presynaptic_noise; return net; } diff --git a/recur-nn-io.c b/recur-nn-io.c index 4f9ba68..126da1c 100644 --- a/recur-nn-io.c +++ b/recur-nn-io.c @@ -38,8 +38,9 @@ rnn_save_net(RecurNN *net, const char *filename, int backup){ * 6: doesn't save BPTT training arrays (e.g. momentum) or hidden state * 7: includes net->clockwork_cycles * 8: drops net->clockwork_cycles + * 9: add net->presynaptic_noise */ - const int version = 8; + const int version = 9; cdb_make_add(&cdbm, FORMAT_VERSION, strlen(FORMAT_VERSION), &version, sizeof(version)); #define SAVE_SCALAR(obj, attr) do { \ @@ -81,6 +82,7 @@ rnn_save_net(RecurNN *net, const char *filename, int backup){ SAVE_SCALAR(net, ho_size); SAVE_SCALAR(net, generation); SAVE_SCALAR(net, flags); + SAVE_SCALAR(net, presynaptic_noise); SAVE_SCALAR(net, rng); /* a struct, should work? */ SAVE_ARRAY(net, ih_weights, net->ih_size); @@ -203,6 +205,7 @@ rnn_load_net(const char *filename){ READ_SCALAR(net, rng); READ_SCALAR(net, generation); READ_SCALAR(net, flags); + READ_SCALAR_IF_VERSION_ELSE_DEFAULT(net, presynaptic_noise, 9, 0); if (tmpnet.flags & RNN_NET_FLAG_OWN_BPTT){ READ_SCALAR(bptt, depth); @@ -229,12 +232,13 @@ rnn_load_net(const char *filename){ net = rnn_new_with_bottom_layer(tmpbl.input_size, tmpbl.output_size, tmpnet.hidden_size, tmpnet.output_size, tmpnet.flags, 0, NULL, tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum, - tmpbl.overlap); + tmpnet.presynaptic_noise, tmpbl.overlap); } else { net = rnn_new(tmpnet.input_size, tmpnet.hidden_size, - tmpnet.output_size, tmpnet.flags, 0, NULL, - tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum); + tmpnet.output_size, tmpnet.flags, 0, NULL, + tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum, + tmpnet.presynaptic_noise); } bptt = net->bptt; bottom_layer = net->bottom_layer; @@ -272,6 +276,7 @@ rnn_load_net(const char *filename){ CHECK_SCALAR(net, tmpnet, generation); CHECK_SCALAR(net, tmpnet, flags); + CHECK_SCALAR(net, tmpnet, presynaptic_noise); if (bptt){ CHECK_SCALAR(bptt, tmpbptt, depth); diff --git a/recur-nn.c b/recur-nn.c index 18cc3fc..4d88994 100644 --- a/recur-nn.c +++ b/recur-nn.c @@ -80,7 +80,7 @@ maybe_scale_inputs(RecurNN *net){ } float * -rnn_opinion(RecurNN *net, const float *restrict inputs){ +rnn_opinion(RecurNN *net, const float *restrict inputs, float presynaptic_noise){ /*If inputs is NULL, assume the inputs have already been set.*/ float *restrict hiddens = net->hidden_layer; ASSUME_ALIGNED(hiddens); @@ -93,6 +93,8 @@ rnn_opinion(RecurNN *net, const float *restrict inputs){ } calculate_interlayer(layer->inputs, layer->i_size, layer->outputs, layer->o_size, layer->weights); + MAYBE_ADD_ARRAY_NOISE(&net->rng, layer->outputs + 1, net->input_size - 1, + presynaptic_noise); memcpy(net->real_inputs, layer->outputs, net->input_size * sizeof(float)); } else if (inputs){ @@ -102,7 +104,7 @@ rnn_opinion(RecurNN *net, const float *restrict inputs){ /*copy in hiddens */ memcpy(net->input_layer, hiddens, INPUT_OFFSET(net) * sizeof(float)); - /*bias, possibly unnecessary because it may not get overwritten */ + /*bias, possibly unnecessary because it probably doesn't get overwritten */ net->input_layer[0] = 1.0f; /* in emergencies, clamp the scale of the input vector */ @@ -110,6 +112,10 @@ rnn_opinion(RecurNN *net, const float *restrict inputs){ calculate_interlayer(net->input_layer, net->i_size, hiddens, net->h_size, net->ih_weights); + + MAYBE_ADD_ARRAY_NOISE(&net->rng, net->hidden_layer + 1, net->h_size - 1, + presynaptic_noise); + for (int i = 1; i < net->h_size; i++){ float h = hiddens[i] - RNN_HIDDEN_PENALTY; hiddens[i] = (h > 0.0f) ? h : 0.0f; @@ -620,10 +626,7 @@ weight_noise(rand_ctx *rng, float *weights, int width, int stride, int height, float deviation){ for (int y = 0; y < height; y++){ float *row = weights + y * stride; - for (int x = 0; x < width; x++){ - float noise = cheap_gaussian_noise(rng) * deviation; - row[x] += noise; - } + add_array_noise(rng, row, width, deviation); } } diff --git a/recur-nn.h b/recur-nn.h index d13d5eb..044453f 100644 --- a/recur-nn.h +++ b/recur-nn.h @@ -162,6 +162,7 @@ struct _RecurNN { RecurExtraLayer *bottom_layer; char *metadata; u32 generation; + float presynaptic_noise; }; struct _RecurNNBPTT { @@ -237,10 +238,9 @@ struct RecurInitialisationParameters { RecurNN * rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags, u64 rng_seed, const char *log_file, int depth, float learn_rate, - float momentum); + float momentum, float presynaptic_noise); -RecurNN * rnn_clone(RecurNN *parent, u32 flags, - u64 rng_seed, const char *log_file); +RecurNN * rnn_clone(RecurNN *parent, u32 flags, u64 rng_seed, const char *log_file); RecurExtraLayer *rnn_new_extra_layer(int input_size, int output_size, int overlap, u32 flags); @@ -248,7 +248,7 @@ RecurExtraLayer *rnn_new_extra_layer(int input_size, int output_size, int overla RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size, int hidden_size, int output_size, u32 flags, u64 rng_seed, const char *log_file, int bptt_depth, float learn_rate, - float momentum, int convolutional_overlap); + float momentum, float presynaptic_noise, int convolutional_overlap); void rnn_set_log_file(RecurNN *net, const char * log_file, int append_dont_truncate); @@ -268,7 +268,7 @@ void rnn_delete_net(RecurNN *net); RecurNN ** rnn_new_training_set(RecurNN *prototype, int n_nets); void rnn_delete_training_set(RecurNN **nets, int n_nets, int leave_prototype); -float *rnn_opinion(RecurNN *net, const float *inputs); +float *rnn_opinion(RecurNN *net, const float *inputs, float presynaptic_noise); void rnn_multi_pgm_dump(RecurNN *net, const char *dumpees, const char *basename); diff --git a/test/test_fb_backprop.c b/test/test_fb_backprop.c index 861fb07..cc10f9f 100644 --- a/test/test_fb_backprop.c +++ b/test/test_fb_backprop.c @@ -42,7 +42,7 @@ load_char_input(RecurNN *net, int c){ static inline float net_error_bptt(RecurNN *net, float *error, int c, int next){ load_char_input(net, c); - float *answer = rnn_opinion(net, NULL); + float *answer = rnn_opinion(net, NULL, net->presynaptic_noise); error[0] = (next & 1) - (answer[0] > 0); error[1] = (!!(next & 2)) - (answer[1] > 0); return (fabsf(error[0]) + fabsf(error[1])) * 0.5; @@ -60,7 +60,7 @@ sgd_one(RecurNN *net, const int current, const int next, uint batch_size){ static inline int char_opinion(RecurNN *net, int c){ load_char_input(net, c); - float * answer = rnn_opinion(net, NULL); + float * answer = rnn_opinion(net, NULL, 0); int a = ((answer[1] > 0) << 1) | (answer[0] > 0); return a; } @@ -132,7 +132,7 @@ main(void){ feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); RecurNN *net = rnn_new(INPUT_SIZE, HIDDEN_SIZE, INPUT_SIZE, RNN_NET_FLAG_STANDARD, - 1, NET_LOG_FILE, BPTT_DEPTH, LEARN_RATE, MOMENTUM); + 1, NET_LOG_FILE, BPTT_DEPTH, LEARN_RATE, 0, MOMENTUM); rnn_randomise_weights_auto(net); START_TIMER(epoch); epoch(net, 5000000); diff --git a/test_backprop.c b/test_backprop.c index 0f9144f..2310d66 100644 --- a/test_backprop.c +++ b/test_backprop.c @@ -100,6 +100,7 @@ Because of ccan/opt, --help will tell you something. #define DEFAULT_FIND_ALPHABET_THRESHOLD 0 #define DEFAULT_FIND_ALPHABET_DIGIT_ADJUST 1.0 #define DEFAULT_FIND_ALPHABET_ALPHA_ADJUST 1.0 +#define DEFAULT_PRESYNAPTIC_NOISE 0.0f #define BELOW_QUIET_LEVEL(quiet) if (opt_quiet < quiet) @@ -168,6 +169,7 @@ static bool opt_collapse_space = DEFAULT_COLLAPSE_SPACE; static double opt_find_alphabet_threshold = DEFAULT_FIND_ALPHABET_THRESHOLD; static double opt_find_alphabet_digit_adjust = DEFAULT_FIND_ALPHABET_DIGIT_ADJUST; static double opt_find_alphabet_alpha_adjust = DEFAULT_FIND_ALPHABET_ALPHA_ADJUST; +static float opt_presynaptic_noise = DEFAULT_PRESYNAPTIC_NOISE; #define IN_RANGE_01(x) (((x) >= 0.0f) && ((x) <= 1.0f)) @@ -315,6 +317,9 @@ static struct opt_table options[] = { &opt_find_alphabet_digit_adjust, "adjust digit frequency for alphabet calculations"), OPT_WITH_ARG("--find-alphabet-alpha-adjust", opt_set_doubleval, opt_show_doubleval, &opt_find_alphabet_alpha_adjust, "adjust letter frequency for alphabet calculation"), + OPT_WITH_ARG("--presynaptic-noise", opt_set_floatval, opt_show_floatval, + &opt_presynaptic_noise, "deviation of noise to add before non-linear transform"), + OPT_WITHOUT_ARG("-h|--help", opt_usage_and_exit, ": Rnn modelling of text at the character level", @@ -431,7 +436,7 @@ load_or_create_net(struct RnnCharMetadata *m, int alpha_len, int reload){ net = rnn_new_with_bottom_layer(input_size, opt_bottom_layer, opt_hidden_size, output_size, flags, opt_rng_seed, opt_logfile, opt_bptt_depth, opt_learn_rate, - opt_momentum, 0); + opt_momentum, opt_presynaptic_noise, 0); initialise_net(net); net->bptt->momentum_weight = opt_momentum_weight; net->metadata = strdup(metadata);