-
Notifications
You must be signed in to change notification settings - Fork 0
/
layer.c
128 lines (108 loc) · 4.05 KB
/
layer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "layer.h"
#include "linearalgebra.h"
#include "util.h"
#define SAFE_DELETE(p) if (p != NULL) { free(p); }
void rand_init_2(double** d, int rows, int cols) {
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
d[i][j] = (double)(rand()) / RAND_MAX;
}
}
}
void rand_init_1(double* d, int n) {
for (int i = 0; i < n; i++) {
d[i] = (double)(rand()) / RAND_MAX;
}
}
Layer* create_layer(int nodes,int input_nodes, ActivateType activate) {
Layer* layer = (Layer*)malloc(sizeof(Layer));
layer->nodes = nodes;
layer->input_nodes = input_nodes;
layer->weights = create_2d_array(nodes, input_nodes);
layer->gradient = create_2d_array(nodes, input_nodes);
layer->batch_gradient = create_2d_array(nodes, input_nodes);
layer->bias = (double*)malloc(nodes * sizeof(double));
layer->gradient_bias = NULL; /* just keep a null pointer. it will be set during optimization */
layer->batch_gradient_bias = (double*)malloc(nodes * sizeof(double));
layer->z = (double*)malloc(nodes * sizeof(double));
layer->a = (double*)malloc(nodes * sizeof(double));
layer->delta = (double*)malloc(nodes * sizeof(double));
zero_matrix_2(layer->batch_gradient, nodes, input_nodes);
zero_matrix_1(layer->batch_gradient_bias, nodes);
rand_init_2(layer->weights, nodes, input_nodes);
rand_init_1(layer->bias, nodes);
return layer;
}
void destroy_layer(Layer* layer) {
if (layer != NULL) {
destroy_2d_array(layer->weights, layer->nodes);
destroy_2d_array(layer->gradient, layer->nodes);
destroy_2d_array(layer->batch_gradient, layer->nodes);
SAFE_DELETE(layer->bias)
SAFE_DELETE(layer->z)
SAFE_DELETE(layer->a)
SAFE_DELETE(layer->delta)
SAFE_DELETE(layer->batch_gradient_bias)
free(layer);
}
}
double sigmoid(double v) {
return 1.0 / (1 + exp(-v));
}
void sigmoid_a(double* a, double* result, int n) {
for (int i = 0; i < n; i++) {
result[i] = sigmoid(a[i]);
}
}
void forward_layer(Layer* layer, double* input) {
matrix_multi_21(layer->weights, layer->nodes, layer->input_nodes, input, layer->z);
matrix_add_1(layer->z, layer->bias, layer->z, layer->nodes);
sigmoid_a(layer->z, layer->a, layer->nodes);
}
void delta_last(double* y, double* a, double* result, int n) {
for (int i = 0; i < n; i++) {
result[i] = -(y[i] - a[i]) * a[i] * (1 - a[i]);
}
}
void delta_mid(double** weight, int rows, int cols, double* delta, double* a, double* result) {
for (int i = 0; i < cols; i++) {
double s = 0;
for (int j = 0; j < rows; j++) {
s += weight[j][i] * delta[j];
}
result[i] = s * a[i] * (1 - a[i]);
}
}
void backward_layer(Layer* layer, Layer* next, double* y) {
if (next == NULL) { // it is the last layer
delta_last(y, layer->a, layer->delta, layer->nodes);
} else {
delta_mid(next->weights, next->nodes, next->input_nodes, next->delta, layer->a, layer->delta);
}
}
void gradient(Layer* layer, Layer* last, double* x) {
if (last == NULL) {
cross(layer->delta, layer->nodes, x, layer->input_nodes, layer->gradient);
} else {
cross(layer->delta, layer->nodes, last->a, layer->input_nodes, layer->gradient);
}
layer->gradient_bias = layer->delta;
matrix_add_2(layer->gradient, layer->batch_gradient, layer->batch_gradient, layer->nodes, layer->input_nodes);
matrix_add_1(layer->gradient_bias, layer->batch_gradient_bias, layer->batch_gradient_bias, layer->nodes);
}
void update_weights(Layer* layer, double lambda, double lr, int batch_size) {
/*update weights and bias, and reset batch gradients*/
for (int i = 0; i < layer->nodes; i++) {
for (int j = 0; j < layer->input_nodes; j++) {
layer->weights[i][j] = layer->weights[i][j] - lr * (layer->batch_gradient[i][j] / batch_size + lambda * layer->weights[i][j]);
}
}
for (int i = 0; i < layer->nodes; i++) {
layer->bias[i] = layer->bias[i] - lr * (layer->batch_gradient_bias[i] / batch_size);
}
zero_matrix_2(layer->batch_gradient, layer->nodes, layer->input_nodes);
zero_matrix_1(layer->batch_gradient_bias, layer->nodes);
}