Skip to content

Commit

Permalink
Add optional pre-synaptic noise
Browse files Browse the repository at this point in the history
This adds gaussian noise after the matrix calculation, and before the
non-linearity (i.e. the rectifier). On test_backprop, the effect seems to
be to speed up each generation, but to converge at a higher entropy than
would be obtained without noise.

Not very well tested outside of test_backprop.
  • Loading branch information
douglasbagnall committed Aug 1, 2014
1 parent 5b8cf24 commit 617b3a5
Show file tree
Hide file tree
Showing 17 changed files with 94 additions and 44 deletions.
2 changes: 1 addition & 1 deletion charmodel-classify.c
Expand Up @@ -93,7 +93,7 @@ rnn_char_classify_epoch(RnnCharClassifier *model, const RnnCharClassifiedChar *t
RnnCharClassifiedChar t = text[offset];
RecurNN *n = nets[j];
rnn_bptt_advance(n);
float *answer = one_hot_opinion(net, t.symbol);
float *answer = one_hot_opinion(net, t.symbol, net->presynaptic_noise);
if (t.class != NO_CLASS){
float *error = n->bptt->o_error;
ASSUME_ALIGNED(error);
Expand Down
4 changes: 2 additions & 2 deletions charmodel-helpers.h
Expand Up @@ -11,7 +11,7 @@ capped_log2f(float x){
}

static inline float*
one_hot_opinion(RecurNN *net, int hot){
one_hot_opinion(RecurNN *net, int hot, float presynaptic_noise){
float *inputs;
int len;
if (net->bottom_layer){
Expand All @@ -26,6 +26,6 @@ one_hot_opinion(RecurNN *net, int hot){
//XXX could just set the previous one to zero (i.e. remember it)
memset(inputs, 0, len * sizeof(float));
inputs[hot] = 1.0f;
return rnn_opinion(net, NULL);
return rnn_opinion(net, NULL, presynaptic_noise);
}

8 changes: 4 additions & 4 deletions charmodel-predict.c
Expand Up @@ -17,7 +17,7 @@ This uses the RNN to predict the next character in a text sequence.
static inline float
net_error_bptt(RecurNN *net, float *restrict error, int c, int next, int *correct){
ASSUME_ALIGNED(error);
float *answer = one_hot_opinion(net, c);
float *answer = one_hot_opinion(net, c, net->presynaptic_noise);
int winner;
winner = softmax_best_guess(error, answer, net->output_size);
*correct = (winner == next);
Expand All @@ -29,7 +29,7 @@ static inline int
guess_next_character(RecurNN *net, int hot, float bias){
int i;
float r;
float *answer = one_hot_opinion(net, hot);
float *answer = one_hot_opinion(net, hot, 0);
ASSUME_ALIGNED(answer);
int len = net->output_size;
if (bias >= 100){
Expand Down Expand Up @@ -67,10 +67,10 @@ validate(RecurNN *net, const u8 *text, int len){
/*skip the first few because state depends too much on previous experience */
int skip = MIN(len / 10, 5);
for (i = 0; i < skip; i++){
one_hot_opinion(net, text[i]);
one_hot_opinion(net, text[i], 0);
}
for (; i < len - 1; i++){
float *answer = one_hot_opinion(net, text[i]);
float *answer = one_hot_opinion(net, text[i], 0);
softmax(error, answer, n_chars);
float e = error[text[i + 1]];
entropy += capped_log2f(e);
Expand Down
6 changes: 3 additions & 3 deletions context-recurse.c
Expand Up @@ -93,7 +93,7 @@ recur_setup_nets(RecurContext *context, const char *log_file)
if (net == NULL){
net = rnn_new(RECUR_N_MFCCS + RECUR_N_VIDEO_FEATURES,
RECUR_N_HIDDEN, RECUR_OUTPUT_SIZE, flags, RECUR_RNG_SEED,
log_file, RECUR_BPTT_DEPTH, LEARN_RATE, MOMENTUM);
log_file, RECUR_BPTT_DEPTH, LEARN_RATE, PRESYNAPTIC_NOISE, MOMENTUM);
rnn_randomise_weights_auto(net);
}
context->net = net;
Expand Down Expand Up @@ -136,7 +136,7 @@ recur_train_nets(RecurContext *context, RecurFrame *src_frame,
RECUR_INPUT_WIDTH + 2, RECUR_INPUT_HEIGHT + 2, t->x - t->scale, t->y - t->scale,
t->scale * RECUR_RESOLUTION_GAIN);

float *answer = rnn_opinion(net, NULL);
float *answer = rnn_opinion(net, NULL, net->presynaptic_noise);
ASSUME_ALIGNED(answer);
fast_sigmoid_array(answer, answer, net->o_size);

Expand Down Expand Up @@ -232,7 +232,7 @@ rnn_recursive_opinion(RecurContext *context, int index)
int i;
RecurNN **constructors = context->constructors;
RecurNN *net = constructors[index];
float *image = rnn_opinion(net, NULL);
float *image = rnn_opinion(net, NULL, 0);
const int mul = RECUR_RESOLUTION_GAIN * RECUR_RESOLUTION_GAIN;
int first_child = index * mul + 1;
if (first_child < RECUR_N_CONSTRUCTORS){
Expand Down
21 changes: 18 additions & 3 deletions gstclassify.c
Expand Up @@ -62,6 +62,7 @@ enum
PROP_CONFIRMATION_LAG,
PROP_LOAD_NET_NOW,
PROP_WINDOWS_PER_SECOND,
PROP_PRESYNAPTIC_NOISE,

PROP_LAST
};
Expand Down Expand Up @@ -90,6 +91,7 @@ enum
#define DEFAULT_PROP_WEIGHT_INIT_SCALE 0.0f
#define DEFAULT_PROP_GENERATION 0
#define DEFAULT_PROP_WINDOWS_PER_SECOND 0
#define DEFAULT_PROP_PRESYNAPTIC_NOISE 0

#define DEFAULT_PROP_CLASSES "01"
#define DEFAULT_PROP_BPTT_DEPTH 30
Expand Down Expand Up @@ -562,6 +564,13 @@ gst_classify_class_init (GstClassifyClass * klass)
DEFAULT_PROP_WEIGHT_INIT_SCALE,
G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS));

g_object_class_install_property (gobject_class, PROP_PRESYNAPTIC_NOISE,
g_param_spec_float("presynaptic-noise", "presynaptic-noise",
"Add this much noise before nonlinear tranform",
0, G_MAXFLOAT,
DEFAULT_PROP_PRESYNAPTIC_NOISE,
G_PARAM_WRITABLE | G_PARAM_STATIC_STRINGS));

g_object_class_install_property (gobject_class, PROP_GENERATION,
g_param_spec_uint("generation", "generation",
"Read the net's training generation",
Expand Down Expand Up @@ -913,6 +922,8 @@ create_net(GstClassify *self, int bottom_layer_size,
GST_DEBUG("rng seed %lu", rng_seed);
float weight_init_scale = PP_GET_FLOAT(self, PROP_WEIGHT_INIT_SCALE,
DEFAULT_PROP_WEIGHT_INIT_SCALE);
float presynaptic_noise = PP_GET_FLOAT(self, PROP_PRESYNAPTIC_NOISE,
DEFAULT_PROP_PRESYNAPTIC_NOISE);

int lawnmower = PP_GET_BOOLEAN(self, PROP_LAWN_MOWER, DEFAULT_PROP_LAWN_MOWER);
if (lawnmower){
Expand All @@ -923,7 +934,7 @@ create_net(GstClassify *self, int bottom_layer_size,
}
net = rnn_new_with_bottom_layer(n_features, bottom_layer_size, hidden_size,
top_layer_size, flags, rng_seed,
NULL, bptt_depth, learn_rate, momentum, 0);
NULL, bptt_depth, learn_rate, momentum, presynaptic_noise, 0);

initialise_net(self, net);

Expand Down Expand Up @@ -1417,6 +1428,9 @@ maybe_set_net_scalar(GstClassify *self, guint prop_id, const GValue *value)
case PROP_MOMENTUM:
SET_FLOAT(net->bptt->momentum);
break;
case PROP_PRESYNAPTIC_NOISE:
SET_FLOAT(net->presynaptic_noise);
break;
case PROP_BOTTOM_LEARN_RATE_SCALE:
if (net->bottom_layer){
SET_FLOAT(net->bottom_layer->learn_rate_scale);
Expand Down Expand Up @@ -1515,6 +1529,7 @@ gst_classify_set_property (GObject * object, guint prop_id, const GValue * value
case PROP_TOP_LEARN_RATE_SCALE:
case PROP_BOTTOM_LEARN_RATE_SCALE:
case PROP_LEARN_RATE:
case PROP_PRESYNAPTIC_NOISE:
case PROP_MOMENTUM:
maybe_set_net_scalar(self, prop_id, value);
break;
Expand Down Expand Up @@ -1789,7 +1804,7 @@ prepare_channel_features(GstClassify *self, s16 *buffer_i, int j){
static inline float
train_channel(GstClassify *self, ClassifyChannel *c, int *win_count){
RecurNN *net = c->net;
float *answer = rnn_opinion(net, c->features);
float *answer = rnn_opinion(net, c->features, net->presynaptic_noise);
float *error = net->bptt->o_error;
float wrongness = 0;
for (int i = 0; i < self->n_groups; i++){
Expand Down Expand Up @@ -1923,7 +1938,7 @@ emit_opinions(GstClassify *self, GstClockTime pts){
ClassifyChannel *c = prepare_channel_features(self, buffer, j);
RecurNN *net = c->net;
float *error = net->bptt->o_error;
float *answer = rnn_opinion(net, c->features);
float *answer = rnn_opinion(net, c->features, 0);
for (i = 0; i < self->n_groups; i++){
ClassifyClassGroup *g = &self->class_groups[i];
int o = g->offset;
Expand Down
5 changes: 3 additions & 2 deletions gstparrot.c
Expand Up @@ -260,7 +260,8 @@ load_or_create_net(GstParrot *self){
if (net == NULL){
net = rnn_new(PARROT_N_FEATURES, self->hidden_size,
PARROT_N_FEATURES, PARROT_RNN_FLAGS, PARROT_RNG_SEED,
NULL, PARROT_BPTT_DEPTH, self->learn_rate, MOMENTUM);
NULL, PARROT_BPTT_DEPTH, self->learn_rate, PARROT_PRESYNAPTIC_NOISE,
MOMENTUM);
rnn_randomise_weights_auto(net);
}
else {
Expand Down Expand Up @@ -453,7 +454,7 @@ possibly_save_net(RecurNN *net, char *filename)

static inline float *
tanh_opinion(RecurNN *net, float *in){
float *answer = rnn_opinion(net, in);
float *answer = rnn_opinion(net, in, 0);
for (int i = 0; i < net->output_size; i++){
answer[i] = fast_tanhf(answer[i]);
}
Expand Down
2 changes: 1 addition & 1 deletion gstparrot.h
Expand Up @@ -36,7 +36,7 @@ G_BEGIN_DECLS
#define PARROT_MFCC_MAX_FREQ (PARROT_RATE * 0.499)
#define PARROT_MFCC_KNEE_FREQ 700
#define PARROT_MFCC_FOCUS_FREQ 0

#define PARROT_PRESYNAPTIC_NOISE 0

#define PARROT_EXTRA_FLAGS (RNN_NET_FLAG_BPTT_ADAPTIVE_MIN_ERROR )

Expand Down
8 changes: 4 additions & 4 deletions gstrnnca.c
Expand Up @@ -36,7 +36,6 @@ enum
PROP_MOMENTUM,
};


#define DEFAULT_PROP_PGM_DUMP ""
#define DEFAULT_PROP_LOG_FILE ""
#define DEFAULT_PROP_OFFSETS RNNCA_DEFAULT_PATTERN
Expand Down Expand Up @@ -327,7 +326,8 @@ load_or_create_net(GstRnnca *self){
int input_size = self->len_Y + self->len_C * 2 + self->len_pos;
net = rnn_new(input_size, self->hidden_size, 3,
RNNCA_RNN_FLAGS, RNNCA_RNG_SEED,
NULL, RNNCA_BPTT_DEPTH, DEFAULT_LEARN_RATE, self->momentum);
NULL, RNNCA_BPTT_DEPTH, DEFAULT_LEARN_RATE,
RNNCA_PRESYNAPTIC_NOISE, self->momentum);
rnn_randomise_weights_auto(net);
//net->bptt->ho_scale = 0.25;
}
Expand Down Expand Up @@ -697,7 +697,7 @@ train_net(GstRnnca *self, RnncaTrainer *t, RnncaFrame *prev, RnncaFrame *now){
/*trainers are not on edges, so edge condition doesn't much matter */
fill_net_inputs(self, net, prev, t->x, t->y, 1);
float *answer;
answer = rnn_opinion(net, NULL);
answer = rnn_opinion(net, NULL, net->presynaptic_noise);
fast_sigmoid_array(answer, answer, 3);
offset = t->y * RNNCA_WIDTH + t->x;
GST_DEBUG("x %d, y %d, offset %d", t->x, t->y, offset);
Expand Down Expand Up @@ -812,7 +812,7 @@ fill_frame(GstRnnca *self, GstVideoFrame *frame){
for (x = 0; x < RNNCA_WIDTH; x++){
RecurNN *net = self->constructors[y * RNNCA_WIDTH + x];
fill_net_inputs(self, net, self->play_frame, x, y, self->edges);
float *answer = rnn_opinion(net, NULL);
float *answer = rnn_opinion(net, NULL, 0);
fast_sigmoid_array(answer, answer, 3);
GST_LOG("answer gen %d, x %d y %d, %.2g %.2g %.2g",
net->generation, x, y, answer[0], answer[1], answer[2]);
Expand Down
1 change: 1 addition & 0 deletions gstrnnca.h
Expand Up @@ -22,6 +22,7 @@ G_BEGIN_DECLS
#define RNNCA_BPTT_DEPTH 10

#define RNNCA_DO_TEMPORAL_LOGGING 0
#define RNNCA_PRESYNAPTIC_NOISE 0

#define LONG_WALK 0

Expand Down
1 change: 1 addition & 0 deletions recur-context.h
Expand Up @@ -53,6 +53,7 @@ typedef s16 audio_sample;
#define RECUR_OUTPUT_SIZE (RECUR_OUTPUT_HEIGHT * RECUR_OUTPUT_WIDTH * 3)

#define LEARN_RATE 1e-5
#define PRESYNAPTIC_NOISE 0
#define MOMENTUM 0.95

#define RECUR_FQ_LENGTH 16
Expand Down
14 changes: 14 additions & 0 deletions recur-nn-helpers.h
Expand Up @@ -166,4 +166,18 @@ zero_aligned_array(float *array, int size){
#endif
}

static inline void
add_array_noise(rand_ctx *rng, float *array, int len, float deviation){
for (int i = 0; i < len; i++){
float noise = cheap_gaussian_noise(rng) * deviation;
array[i] += noise;
}
}

#define MAYBE_ADD_ARRAY_NOISE(rng, array, len, dev) do { \
if (dev) \
add_array_noise(rng, array, len, dev); \
} while(0)


#endif
15 changes: 10 additions & 5 deletions recur-nn-init.c
Expand Up @@ -69,7 +69,7 @@ new_bptt(RecurNN *net, int depth, float learn_rate, float momentum, u32 flags){
RecurNN *
rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags,
u64 rng_seed, const char *log_file, int bptt_depth, float learn_rate,
float momentum){
float momentum, float presynaptic_noise){
RecurNN *net = calloc(1, sizeof(RecurNN));
float *fm;
/*sizes */
Expand All @@ -89,6 +89,7 @@ rnn_new(uint input_size, uint hidden_size, uint output_size, u32 flags,
net->ho_size = ho_size;
net->generation = 0;
net->flags = flags;
net->presynaptic_noise = presynaptic_noise;
init_rand64_maybe_randomly(&net->rng, rng_seed);

size_t alloc_bytes = (i_size + h_size + o_size) * sizeof(float);
Expand Down Expand Up @@ -177,20 +178,22 @@ rnn_new_extra_layer(int input_size, int output_size, int overlap,
RecurNN *rnn_new_with_bottom_layer(int n_inputs, int r_input_size,
int hidden_size, int output_size, u32 flags, u64 rng_seed,
const char *log_file, int bptt_depth, float learn_rate,
float momentum, int convolutional_overlap)
float momentum, float presynaptic_noise, int convolutional_overlap)
{
RecurNN *net;
if (r_input_size == 0){
MAYBE_DEBUG("rnn_new_with_bottom_layer returning bottomless net, "
"due to zero internal size");
flags &= ~RNN_NET_FLAG_BOTTOM_LAYER;
net = rnn_new(n_inputs, hidden_size, output_size,
flags, rng_seed, log_file, bptt_depth, learn_rate, momentum);
flags, rng_seed, log_file, bptt_depth, learn_rate, momentum,
presynaptic_noise);
}
else {
flags |= RNN_NET_FLAG_BOTTOM_LAYER;
net = rnn_new(r_input_size, hidden_size, output_size,
flags, rng_seed, log_file, bptt_depth, learn_rate, momentum);
flags, rng_seed, log_file, bptt_depth, learn_rate, momentum,
presynaptic_noise);

net->bottom_layer = rnn_new_extra_layer(n_inputs, r_input_size,
convolutional_overlap, net->flags);
Expand Down Expand Up @@ -300,7 +303,8 @@ rnn_clone(RecurNN *parent, u32 flags,
momentum = 0;
}
net = rnn_new(parent->input_size, parent->hidden_size, parent->output_size,
flags, rng_seed, log_file, bptt_depth, learn_rate, momentum);
flags, rng_seed, log_file, bptt_depth, learn_rate, momentum,
parent->presynaptic_noise);

if (parent->bptt && (flags & RNN_NET_FLAG_OWN_BPTT)){
net->bptt->momentum_weight = parent->bptt->momentum_weight;
Expand All @@ -324,6 +328,7 @@ rnn_clone(RecurNN *parent, u32 flags,
/*for now, the bottom layers can be shared */
net->bottom_layer = parent->bottom_layer;
net->generation = parent->generation;
net->presynaptic_noise = parent->presynaptic_noise;
return net;
}

Expand Down
13 changes: 9 additions & 4 deletions recur-nn-io.c
Expand Up @@ -38,8 +38,9 @@ rnn_save_net(RecurNN *net, const char *filename, int backup){
* 6: doesn't save BPTT training arrays (e.g. momentum) or hidden state
* 7: includes net->clockwork_cycles
* 8: drops net->clockwork_cycles
* 9: add net->presynaptic_noise
*/
const int version = 8;
const int version = 9;
cdb_make_add(&cdbm, FORMAT_VERSION, strlen(FORMAT_VERSION), &version, sizeof(version));

#define SAVE_SCALAR(obj, attr) do { \
Expand Down Expand Up @@ -81,6 +82,7 @@ rnn_save_net(RecurNN *net, const char *filename, int backup){
SAVE_SCALAR(net, ho_size);
SAVE_SCALAR(net, generation);
SAVE_SCALAR(net, flags);
SAVE_SCALAR(net, presynaptic_noise);
SAVE_SCALAR(net, rng); /* a struct, should work? */

SAVE_ARRAY(net, ih_weights, net->ih_size);
Expand Down Expand Up @@ -203,6 +205,7 @@ rnn_load_net(const char *filename){
READ_SCALAR(net, rng);
READ_SCALAR(net, generation);
READ_SCALAR(net, flags);
READ_SCALAR_IF_VERSION_ELSE_DEFAULT(net, presynaptic_noise, 9, 0);

if (tmpnet.flags & RNN_NET_FLAG_OWN_BPTT){
READ_SCALAR(bptt, depth);
Expand All @@ -229,12 +232,13 @@ rnn_load_net(const char *filename){
net = rnn_new_with_bottom_layer(tmpbl.input_size, tmpbl.output_size,
tmpnet.hidden_size, tmpnet.output_size, tmpnet.flags, 0, NULL,
tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum,
tmpbl.overlap);
tmpnet.presynaptic_noise, tmpbl.overlap);
}
else {
net = rnn_new(tmpnet.input_size, tmpnet.hidden_size,
tmpnet.output_size, tmpnet.flags, 0, NULL,
tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum);
tmpnet.output_size, tmpnet.flags, 0, NULL,
tmpbptt.depth, tmpbptt.learn_rate, tmpbptt.momentum,
tmpnet.presynaptic_noise);
}
bptt = net->bptt;
bottom_layer = net->bottom_layer;
Expand Down Expand Up @@ -272,6 +276,7 @@ rnn_load_net(const char *filename){

CHECK_SCALAR(net, tmpnet, generation);
CHECK_SCALAR(net, tmpnet, flags);
CHECK_SCALAR(net, tmpnet, presynaptic_noise);

if (bptt){
CHECK_SCALAR(bptt, tmpbptt, depth);
Expand Down

0 comments on commit 617b3a5

Please sign in to comment.