@@ -6,84 +6,80 @@ float *g &[1] = 0xNNNN
66COMMENT: g gradient update
77COMMENT: g fwd
88# n4_c[0] := (a[0] + b[0]);
9- n4_c[0]{=MAYBE UNINITIALIZED} = -2 = (a[0]{=-4 } + b[0]{=2 })
9+ n4_c[0]{=MAYBE UNINITIALIZED} = -2000e-3 = (a[0]{=-4000e-3 } + b[0]{=2000e-3 })
1010# n19_c[0] := ((n4_c[0] + n4_c[0]) + 1);
11- n19_c[0]{=MAYBE UNINITIALIZED} = -3 = ((n4_c[0]{=-2 } + n4_c[0]{=-2 }) + (float)(1))
11+ n19_c[0]{=MAYBE UNINITIALIZED} = -3000e- 3 = ((n4_c[0]{=-2000e-3 } + n4_c[0]{=-2000e-3 }) + (float)(1))
1212# n42[0] := (b[0] - a[0]);
13- n42[0]{=MAYBE UNINITIALIZED} = 6 = (b[0]{=2 } - a[0]{=-4 })
13+ n42[0]{=MAYBE UNINITIALIZED} = 6000e-3 = (b[0]{=2000e-3 } - a[0]{=-4000e-3 })
1414# n31[0] := (b[0] + a[0]);
15- n31[0]{=MAYBE UNINITIALIZED} = -2 = (b[0]{=2 } + a[0]{=-4 })
15+ n31[0]{=MAYBE UNINITIALIZED} = -2000e-3 = (b[0]{=2000e-3 } + a[0]{=-4000e-3 })
1616# n14_d[0] := fma(a[0], b[0], (b[0] * (b[0] * b[0])));
17- n14_d[0]{=MAYBE UNINITIALIZED} = 0 = fmaf (a[0]{=-4 }, b[0]{=2 }, (b[0]{=2 } * (b[0]{=2 } * b[0]{=2 })))
17+ n14_d[0]{=MAYBE UNINITIALIZED} = 0e-3 = fma (a[0]{=-4000e-3 }, b[0]{=2000e-3 }, (b[0]{=2000e-3 } * (b[0]{=2000e-3 } * b[0]{=2000e-3 })))
1818# n40_d[0] := (fma(n14_d[0], 2, n14_d[0]) + relu(n31[0]));
19- n40_d[0]{=MAYBE UNINITIALIZED} = 0 = (fmaf(n14_d[0]{=0}, (float)(2), n14_d[0]{=0}) + fmaxf(0.0, n31[0]{=-2}))
20- # e[0] :=
21- (fma(-1, a[0], ((n19_c[0] + 1) + n19_c[0])) -
22- (fma(3, n40_d[0], n40_d[0]) + relu(n42[0])));
23- e[0]{=MAYBE UNINITIALIZED} = -7 = (fmaf((float)(-1), a[0]{=-4}, ((n19_c[0]{=-3} + (float)(1)) + n19_c[0]{=-3})) - (fmaf((float)(3), n40_d[0]{=0}, n40_d[0]{=0}) + fmaxf(0.0, n42[0]{=6})))
19+ n40_d[0]{=MAYBE UNINITIALIZED} = 0e-3 = (fma(n14_d[0]{=0e-3}, (float)(2), n14_d[0]{=0e-3}) + max(0.0f, n31[0]{=-2000e-3}))
20+ # e[0] :=$ (fma(-1, a[0], ((n19_c[0] + 1) + n19_c[0])) -$ (fma(3, n40_d[0], n40_d[0]) + relu(n42[0])));
21+ e[0]{=MAYBE UNINITIALIZED} = -7000e-3 = (fma((float)(-1), a[0]{=-4000e-3}, ((n19_c[0]{=-3000e-3} + (float)(1)) + n19_c[0]{=-3000e-3})) - (fma((float)(3), n40_d[0]{=0e-3}, n40_d[0]{=0e-3}) + max(0.0f, n42[0]{=6000e-3})))
2422# f[0] := (e[0] * e[0]);
25- f[0]{=MAYBE UNINITIALIZED} = 49 = (e[0]{=-7 } * e[0]{=-7 })
23+ f[0]{=MAYBE UNINITIALIZED} = 49000e-3 = (e[0]{=-7000e-3 } * e[0]{=-7000e-3 })
2624# g[0] := ((f[0] / 2) + (10 / f[0]));
27- g[0]{=MAYBE UNINITIALIZED} = 24.7041 = ((f[0]{=49 } / (float)(2)) + ((float)(10) / f[0]{=49 }))
25+ g[0]{=MAYBE UNINITIALIZED} = 24704e-3 = ((f[0]{=49000e-3 } / (float)(2)) + ((float)(10) / f[0]{=49000e-3 }))
2826COMMENT: end
2927COMMENT: g zero grads
3028# a_grad := 0
31- a_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
29+ a_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
3230# b_grad := 0
33- b_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
31+ b_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
3432# n4_c_grad := 0
35- n4_c_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
33+ n4_c_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
3634# n19_c_grad := 0
37- n19_c_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
35+ n19_c_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
3836# n14_d_grad := 0
39- n14_d_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
37+ n14_d_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
4038# n40_d_grad := 0
41- n40_d_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
39+ n40_d_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
4240# f_grad := 0
43- f_grad[0]{=MAYBE UNINITIALIZED} = 0 = (float)(0)
41+ f_grad[0]{=MAYBE UNINITIALIZED} = 0e-3 = (float)(0)
4442COMMENT: end
4543COMMENT: g bprop
4644# f.grad[0] := fma(1, (-10 / (f[0] * f[0])), f.grad[0]);
47- f_grad[0]{=MAYBE UNINITIALIZED} = -0.00416493 = fmaf ((float)(1), ((float)(-10) / (f[0]{=49 } * f[0]{=49 })), f_grad[0]{=0 })
45+ f_grad[0]{=MAYBE UNINITIALIZED} = -4e-3 = fma ((float)(1), ((float)(-10) / (f[0]{=49000e-3 } * f[0]{=49000e-3 })), f_grad[0]{=0e-3 })
4846# f.grad[0] := (f.grad[0] + 0.5);
49- f_grad[0]{=MAYBE UNINITIALIZED} = 0.495835 = (f_grad[0]{=-0.00416493 } + (float)(0.5))
47+ f_grad[0]{=MAYBE UNINITIALIZED} = 495e-3 = (f_grad[0]{=-4e-3 } + (float)(0.5))
5048# n40_d.grad[0] := fma(-1, ((2 * e[0]) * f.grad[0]), n40_d.grad[0]);
51- n40_d_grad[0]{=MAYBE UNINITIALIZED} = 6.94169 = fmaf ((float)(-1), (((float)(2) * e[0]{=-7 }) * f_grad[0]{=0.495835 }), n40_d_grad[0]{=0 })
49+ n40_d_grad[0]{=MAYBE UNINITIALIZED} = 6941e-3 = fma ((float)(-1), (((float)(2) * e[0]{=-7000e-3 }) * f_grad[0]{=495e-3 }), n40_d_grad[0]{=0e-3 })
5250# n40_d.grad[0] := fma(3, (-1 * ((2 * e[0]) * f.grad[0])), n40_d.grad[0]);
53- n40_d_grad[0]{=MAYBE UNINITIALIZED} = 27.7668 = fmaf ((float)(3), ((float)(-1) * (((float)(2) * e[0]{=-7 }) * f_grad[0]{=0.495835 })), n40_d_grad[0]{=6.94169 })
51+ n40_d_grad[0]{=MAYBE UNINITIALIZED} = 27766e-3 = fma ((float)(3), ((float)(-1) * (((float)(2) * e[0]{=-7000e-3 }) * f_grad[0]{=495e-3 })), n40_d_grad[0]{=6941e-3 })
5452# n14_d.grad[0] := (n14_d.grad[0] + n40_d.grad[0]);
55- n14_d_grad[0]{=MAYBE UNINITIALIZED} = 27.7668 = (n14_d_grad[0]{=0 } + n40_d_grad[0]{=27.7668 })
53+ n14_d_grad[0]{=MAYBE UNINITIALIZED} = 27766e-3 = (n14_d_grad[0]{=0e-3 } + n40_d_grad[0]{=27766e-3 })
5654# n14_d.grad[0] := fma(n40_d.grad[0], 2, n14_d.grad[0]);
57- n14_d_grad[0]{=MAYBE UNINITIALIZED} = 83.3003 = fmaf (n40_d_grad[0]{=27.7668 }, (float)(2), n14_d_grad[0]{=27.7668 })
55+ n14_d_grad[0]{=MAYBE UNINITIALIZED} = 83300e-3 = fma (n40_d_grad[0]{=27766e-3 }, (float)(2), n14_d_grad[0]{=27766e-3 })
5856# a.grad[0] := fma(n14_d.grad[0], b[0], a.grad[0]);
59- a_grad[0]{=MAYBE UNINITIALIZED} = 166.601 = fmaf (n14_d_grad[0]{=83.3003 }, b[0]{=2 }, a_grad[0]{=0 })
57+ a_grad[0]{=MAYBE UNINITIALIZED} = 166600e-3 = fma (n14_d_grad[0]{=83300e-3 }, b[0]{=2000e-3 }, a_grad[0]{=0e-3 })
6058# b.grad[0] := fma(a[0], n14_d.grad[0], b.grad[0]);
61- b_grad[0]{=MAYBE UNINITIALIZED} = -333.201 = fmaf (a[0]{=-4 }, n14_d_grad[0]{=83.3003 }, b_grad[0]{=0 })
59+ b_grad[0]{=MAYBE UNINITIALIZED} = -333201e-3 = fma (a[0]{=-4000e-3 }, n14_d_grad[0]{=83300e-3 }, b_grad[0]{=0e-3 })
6260# b.grad[0] := fma((3 * (b[0] * b[0])), n14_d.grad[0], b.grad[0]);
63- b_grad[0]{=MAYBE UNINITIALIZED} = 666.402 = fmaf (((float)(3) * (b[0]{=2 } * b[0]{=2 })), n14_d_grad[0]{=83.3003 }, b_grad[0]{=-333.201 })
61+ b_grad[0]{=MAYBE UNINITIALIZED} = 666402e-3 = fma (((float)(3) * (b[0]{=2000e-3 } * b[0]{=2000e-3 })), n14_d_grad[0]{=83300e-3 }, b_grad[0]{=-333201e-3 })
6462# b.grad[0] := (b.grad[0] + relu_gate(n31[0], n40_d.grad[0]));
65- b_grad[0]{=MAYBE UNINITIALIZED} = 666.402 = (b_grad[0]{=666.402 } + (n31[0]{=-2 } > 0.0 ? n40_d_grad[0]{=27.7668 } : 0.0 ))
63+ b_grad[0]{=MAYBE UNINITIALIZED} = 666402e-3 = (b_grad[0]{=666402e-3 } + (( n31[0]{=-2000e-3 } > 0.0f) ? n40_d_grad[0]{=27766e-3 } : 0.0f ))
6664# a.grad[0] := (a.grad[0] + relu_gate(n31[0], n40_d.grad[0]));
67- a_grad[0]{=MAYBE UNINITIALIZED} = 166.601 = (a_grad[0]{=166.601} + (n31[0]{=-2} > 0.0 ? n40_d_grad[0]{=27.7668} : 0.0))
68- # b.grad[0] :=
69- (b.grad[0] + relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
70- b_grad[0]{=MAYBE UNINITIALIZED} = 673.344 = (b_grad[0]{=666.402} + (n42[0]{=6} > 0.0 ? ((float)(-1) * (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835})) : 0.0))
71- # a.grad[0] :=
72- (a.grad[0] - relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
73- a_grad[0]{=MAYBE UNINITIALIZED} = 159.659 = (a_grad[0]{=166.601} - (n42[0]{=6} > 0.0 ? ((float)(-1) * (((float)(2) * e[0]{=-7}) * f_grad[0]{=0.495835})) : 0.0))
65+ a_grad[0]{=MAYBE UNINITIALIZED} = 166600e-3 = (a_grad[0]{=166600e-3} + ((n31[0]{=-2000e-3} > 0.0f) ? n40_d_grad[0]{=27766e-3} : 0.0f))
66+ # b.grad[0] :=$ (b.grad[0] + relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
67+ b_grad[0]{=MAYBE UNINITIALIZED} = 673344e-3 = (b_grad[0]{=666402e-3} + ((n42[0]{=6000e-3} > 0.0f) ? ((float)(-1) * (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3})) : 0.0f))
68+ # a.grad[0] :=$ (a.grad[0] - relu_gate(n42[0], (-1 * ((2 * e[0]) * f.grad[0]))));
69+ a_grad[0]{=MAYBE UNINITIALIZED} = 159658e-3 = (a_grad[0]{=166600e-3} - ((n42[0]{=6000e-3} > 0.0f) ? ((float)(-1) * (((float)(2) * e[0]{=-7000e-3}) * f_grad[0]{=495e-3})) : 0.0f))
7470# n19_c.grad[0] := fma((2 * e[0]), f.grad[0], n19_c.grad[0]);
75- n19_c_grad[0]{=MAYBE UNINITIALIZED} = -6.94169 = fmaf (((float)(2) * e[0]{=-7 }), f_grad[0]{=0.495835 }, n19_c_grad[0]{=0 })
71+ n19_c_grad[0]{=MAYBE UNINITIALIZED} = -6941e-3 = fma (((float)(2) * e[0]{=-7000e-3 }), f_grad[0]{=495e-3 }, n19_c_grad[0]{=0e-3 })
7672# n19_c.grad[0] := fma((2 * e[0]), f.grad[0], n19_c.grad[0]);
77- n19_c_grad[0]{=MAYBE UNINITIALIZED} = -13.8834 = fmaf (((float)(2) * e[0]{=-7 }), f_grad[0]{=0.495835 }, n19_c_grad[0]{=-6.94169 })
73+ n19_c_grad[0]{=MAYBE UNINITIALIZED} = -13883e-3 = fma (((float)(2) * e[0]{=-7000e-3 }), f_grad[0]{=495e-3 }, n19_c_grad[0]{=-6941e-3 })
7874# n4_c.grad[0] := (n4_c.grad[0] + n19_c.grad[0]);
79- n4_c_grad[0]{=MAYBE UNINITIALIZED} = -13.8834 = (n4_c_grad[0]{=0 } + n19_c_grad[0]{=-13.8834 })
75+ n4_c_grad[0]{=MAYBE UNINITIALIZED} = -13883e-3 = (n4_c_grad[0]{=0e-3 } + n19_c_grad[0]{=-13883e-3 })
8076# n4_c.grad[0] := (n4_c.grad[0] + n19_c.grad[0]);
81- n4_c_grad[0]{=MAYBE UNINITIALIZED} = -27.7668 = (n4_c_grad[0]{=-13.8834 } + n19_c_grad[0]{=-13.8834 })
77+ n4_c_grad[0]{=MAYBE UNINITIALIZED} = -27766e-3 = (n4_c_grad[0]{=-13883e-3 } + n19_c_grad[0]{=-13883e-3 })
8278# a.grad[0] := (a.grad[0] + n4_c.grad[0]);
83- a_grad[0]{=MAYBE UNINITIALIZED} = 131.892 = (a_grad[0]{=159.659 } + n4_c_grad[0]{=-27.7668 })
79+ a_grad[0]{=MAYBE UNINITIALIZED} = 131892e-3 = (a_grad[0]{=159658e-3 } + n4_c_grad[0]{=-27766e-3 })
8480# b.grad[0] := (b.grad[0] + n4_c.grad[0]);
85- b_grad[0]{=MAYBE UNINITIALIZED} = 645.577 = (b_grad[0]{=673.344 } + n4_c_grad[0]{=-27.7668 })
81+ b_grad[0]{=MAYBE UNINITIALIZED} = 645577e-3 = (b_grad[0]{=673344e-3 } + n4_c_grad[0]{=-27766e-3 })
8682# a.grad[0] := fma(-1, ((2 * e[0]) * f.grad[0]), a.grad[0]);
87- a_grad[0]{=MAYBE UNINITIALIZED} = 138.834 = fmaf ((float)(-1), (((float)(2) * e[0]{=-7 }) * f_grad[0]{=0.495835 }), a_grad[0]{=131.892 })
83+ a_grad[0]{=MAYBE UNINITIALIZED} = 138833e-3 = fma ((float)(-1), (((float)(2) * e[0]{=-7000e-3 }) * f_grad[0]{=495e-3 }), a_grad[0]{=131892e-3 })
8884COMMENT: end
8985COMMENT: end
0 commit comments