Skip to content
Permalink
Browse files

Small fixes in the comments/doc (#379)

* Code formatting
* typos
* a touch of [`mathfmt`](https://github.com/mmcloughlin/mathfmt) does some magic for the documentation of the BarzilaiBorweinSolver
  • Loading branch information
owulveryck committed Feb 16, 2020
1 parent 3543794 commit 462ed1ec3f1bfc65805990bc87293661933a29b5
Showing with 61 additions and 61 deletions.
  1. +17 −17 api_gen.go
  2. +2 −2 blas_test.go
  3. +2 −2 cmd/genapi/main.go
  4. +1 −1 compile.go
  5. +1 −1 differentiation.go
  6. +0 −1 interfaces.go
  7. +2 −2 nn.go
  8. +2 −2 node_test.go
  9. +18 −18 op_nn.go
  10. +1 −1 op_tensor.go
  11. +1 −1 operations.go
  12. +1 −1 operatorPointwise_binary.go
  13. +2 −2 shape.go
  14. +9 −8 solvers.go
  15. +2 −2 solvers_test.go

Some generated files are not rendered by default. Learn more.

@@ -16,7 +16,7 @@ type testBLASImplementation struct {
}

// Sdsdot computes the dot product of the two vectors plus a constant
// alpha + \sum_i x[i]*y[i]
// alpha + ∑_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
// Sdsdot ...
@@ -25,7 +25,7 @@ func (*testBLASImplementation) Sdsdot(n int, alpha float32, x []float32, incX in
}

// Dsdot computes the dot product of the two vectors
// \sum_i x[i]*y[i]
// ∑_i x[i]*y[i]
//
// Float32 implementations are autogenerated and not directly tested.
// Dsdot ...
@@ -44,8 +44,8 @@ const unaryTemplateRaw = ` // {{.FnName}} performs a pointwise {{lower .FnName}}
func {{.FnName}}(a *Node) (*Node, error) { return unaryOpNode(newElemUnaryOp({{.OpType}}, a), a) }
`

const binaryTemplateRaw = `// {{.FnName}} perfors a pointwise {{lower .FnName}} operation.
{{if .AsSame -}}// retSame indicates if the data type of the return value should be the same as the input data type. It defaults to Bool otherwise.
const binaryTemplateRaw = `// {{.FnName}} performs a pointwise {{lower .FnName}} operation.
{{if .AsSame -}}// retSame indicates if the data type of the return value should be the same as the input data type. It defaults to Bool otherwise.
{{end -}}
func {{.FnName}}(a, b *Node{{if .AsSame}}, retSame bool{{end}}) (*Node, error) { {{if not .AsSame -}}return binOpNode(newElemBinOp({{.OpType}}, a, b), a, b) {{else -}}
op := newElemBinOp({{.OpType}}, a, b)
@@ -595,7 +595,7 @@ func (cg *codegenerator) gen() (*program, map[*Node]register) {
enterLogScope()
defer leaveLogScope()
for i, node := range cg.sorted {
// for i := len(cg.sorted) - 1; i >= 0; i-- {
// for i := len(cg.sorted) - 1; i 0; i-- {
// node := cg.sorted[i]
replacement := cg.df.replacements[node]
compileLogf("Working on %x. Replacement: %x", node.ID(), replacement.ID())
@@ -33,7 +33,7 @@ func forwardDiffAnalysis(outputs, sortedNodes Nodes) (retVal NodeSet, err error)

symdiffLogf("Diff Set: %d", diffSet)
symdiffLogf("%d", sortedNodes)
// for i := len(sortedNodes) - 1; i >= 0; i-- {
// for i := len(sortedNodes) - 1; i 0; i-- {
// n := sortedNodes[i]
for _, n := range sortedNodes {
if diffSet.Contains(n) && !n.isInput() {
@@ -7,7 +7,6 @@ import (
"gorgonia.org/tensor"
)


// Tensor is an interface that describes an ndarray
type Tensor interface {
// info about the ndarrayN
4 nn.go
@@ -101,7 +101,7 @@ func Dropout(x *Node, prob float64) (retVal *Node, err error) {

// LeakyRelu returns a node whose underlying value is:
// f(x) = alpha * x if x < 0
// f(x) = x for x >= 0
// f(x) = x for x 0
// applied elementwise.
func LeakyRelu(x *Node, alpha float64) (*Node, error) {
var zero *Node
@@ -154,7 +154,7 @@ func LeakyRelu(x *Node, alpha float64) (*Node, error) {
}

// Rectify is a convenience function for creating rectified linear units activation functions.
// This function uses >=, which is the canonical version. If you want to use >, you can create
// This function uses , which is the canonical version. If you want to use >, you can create
// your own by just following this.
func Rectify(x *Node) (retVal *Node, err error) {
var zero *Node
@@ -110,7 +110,7 @@ func TestNodeBasics(t *testing.T) {
returnNode(n)

// Returns itsef
n = newNode(In(g), WithType(makeTensorType(2, Float32)), WithShape(2,3))
n = newNode(In(g), WithType(makeTensorType(2, Float32)), WithShape(2, 3))
m := n.Node()
if n != m {
t.Error("Expected n.Node() to return itself, pointers and all")
@@ -119,7 +119,7 @@ func TestNodeBasics(t *testing.T) {
if len(ns) != 1 {
t.Errorf("Expected Nodes() to return a slice of length 1. Got %v", ns)
}
if ns[0]!= n {
if ns[0] != n {
t.Error("Expected first slice to be itself.")
}
m = nil
@@ -1305,12 +1305,12 @@ func (op *BatchNormOp) f64s(input, output *tensor.Dense) (err error) {
whichblas.Dgemm(blas.NoTrans, blas.NoTrans, nc, spatialDim, 1, -1, nbc, 1, ssm, spatialDim, 1, outputF64s, spatialDim)

if op.training {
// compute variance using var(X) = E(X-EX)^2)
// compute variance using var(X) = E(X-EX)²)
copy(tmp, outputF64s)
vecf64.Mul(tmp, tmp) // (X-EX) ^ 2

whichblas.Dgemv(blas.NoTrans, nc, spatialDim, 1.0/(float64(n*spatialDim)), tmp, spatialDim, ssm, 1, 0, nbc, 1)
whichblas.Dgemv(blas.Trans, n, channels, 1.0, nbc, channels, bsm, 1, 0, varianceTmp, 1) // E((X_EX)^2)
whichblas.Dgemv(blas.Trans, n, channels, 1.0, nbc, channels, bsm, 1, 0, varianceTmp, 1) // E((X_EX)²)

// compute and save moving average
op.ma.Float64s()[0] *= momentum
@@ -1386,12 +1386,12 @@ func (op *BatchNormOp) f32s(input, output *tensor.Dense) (err error) {
whichblas.Sgemm(blas.NoTrans, blas.NoTrans, nc, spatialDim, 1, -1, nbc, 1, ssm, spatialDim, 1, outputF32s, spatialDim)

if op.training {
// compute variance using var(X) = E(X-EX)^2)
// compute variance using var(X) = E(X-EX)²)
copy(tmp, outputF32s)
vecf32.Mul(tmp, tmp) // (X-EX) ^ 2

whichblas.Sgemv(blas.NoTrans, nc, spatialDim, 1.0/(float32(n*spatialDim)), tmp, spatialDim, ssm, 1, 0, nbc, 1)
whichblas.Sgemv(blas.Trans, n, channels, 1.0, nbc, channels, bsm, 1, 0, varianceTmp, 1) // E((X_EX)^2)
whichblas.Sgemv(blas.Trans, n, channels, 1.0, nbc, channels, bsm, 1, 0, varianceTmp, 1) // E((X_EX)²)

// compute and save moving average
op.ma.Float32s()[0] *= momentum
@@ -1517,16 +1517,16 @@ func (op *batchnormDiffOp) f64s(input, inGrad, outGrad *tensor.Dense) (err error
// if Y = (X-mean(X))/(sqrt(var(X)+eps)), then
//
// dE(Y)/dX =
// (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y)
// (dE/dY - mean(dE/dY) - mean(dE/dY Y) Y)
// ./ sqrt(var(X) + eps)
//
// where \cdot and ./ are hadamard product and elementwise division,
// where and ./ are hadamard product and elementwise division,
// respectively, dE/dY is the top diff, and mean/var/sum are all computed
// along all dimensions except the channels dimension. In the above
// equation, the operations allow for expansion (i.e. broadcast) along all
// dimensions except the channels dimension where required.

// sum(dE/dY \cdot Y)
// sum(dE/dY Y)
copy(ig, out)
vecf64.Mul(ig, og)
whichblas.Dgemv(blas.NoTrans, nc, spatialDim, 1, ig, spatialDim, ssm, 1, 0, nbc, 1)
@@ -1536,19 +1536,19 @@ func (op *batchnormDiffOp) f64s(input, inGrad, outGrad *tensor.Dense) (err error
whichblas.Dgemm(blas.NoTrans, blas.NoTrans, n, channels, 1, 1, bsm, 1, meanTmp, channels, 0, nbc, channels)
whichblas.Dgemm(blas.NoTrans, blas.NoTrans, nc, spatialDim, 1, 1, nbc, 1, ssm, spatialDim, 0, ig, spatialDim)

// sum(dE/dY \cdot Y) \cdot Y
// sum(dE/dY Y) Y
vecf64.Mul(ig, out)

// sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
// sum(dE/dY)-sum(dE/dY Y) Y
whichblas.Dgemv(blas.NoTrans, nc, spatialDim, 1, og, spatialDim, ssm, 1, 0, nbc, 1)
whichblas.Dgemv(blas.Trans, n, channels, 1, nbc, channels, bsm, 1, 0, meanTmp, 1)

// reshape (broadcast) the above to make
// sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
// sum(dE/dY)-sum(dE/dY Y) Y
whichblas.Dgemm(blas.NoTrans, blas.NoTrans, n, channels, 1, 1, bsm, 1, meanTmp, channels, 0, nbc, channels)
whichblas.Dgemm(blas.NoTrans, blas.NoTrans, nc, spatialDim, 1, 1, nbc, 1, ssm, spatialDim, 1, ig, spatialDim)

// dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y
// dE/dY - mean(dE/dY)-mean(dE/dY Y) Y
beta := (-1.0 / float64(nc))
vecf64.Scale(ig, beta)
vecf64.Add(ig, og)
@@ -1585,16 +1585,16 @@ func (op *batchnormDiffOp) f32s(input, inGrad, outGrad *tensor.Dense) (err error
// if Y = (X-mean(X))/(sqrt(var(X)+eps)), then
//
// dE(Y)/dX =
// (dE/dY - mean(dE/dY) - mean(dE/dY \cdot Y) \cdot Y)
// (dE/dY - mean(dE/dY) - mean(dE/dY Y) Y)
// ./ sqrt(var(X) + eps)
//
// where \cdot and ./ are hadamard product and elementwise division,
// where and ./ are hadamard product and elementwise division,
// respectively, dE/dY is the top diff, and mean/var/sum are all computed
// along all dimensions except the channels dimension. In the above
// equation, the operations allow for expansion (i.e. broadcast) along all
// dimensions except the channels dimension where required.

// sum(dE/dY \cdot Y)
// sum(dE/dY Y)
copy(ig, out)
vecf32.Mul(ig, og)
whichblas.Sgemv(blas.NoTrans, nc, spatialDim, 1, ig, spatialDim, ssm, 1, 0, nbc, 1)
@@ -1604,19 +1604,19 @@ func (op *batchnormDiffOp) f32s(input, inGrad, outGrad *tensor.Dense) (err error
whichblas.Sgemm(blas.NoTrans, blas.NoTrans, n, channels, 1, 1, bsm, 1, meanTmp, channels, 0, nbc, channels)
whichblas.Sgemm(blas.NoTrans, blas.NoTrans, nc, spatialDim, 1, 1, nbc, 1, ssm, spatialDim, 0, ig, spatialDim)

// sum(dE/dY \cdot Y) \cdot Y
// sum(dE/dY Y) Y
vecf32.Mul(ig, out)

// sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
// sum(dE/dY)-sum(dE/dY Y) Y
whichblas.Sgemv(blas.NoTrans, nc, spatialDim, 1, og, spatialDim, ssm, 1, 0, nbc, 1)
whichblas.Sgemv(blas.Trans, n, channels, 1, nbc, channels, bsm, 1, 0, meanTmp, 1)

// reshape (broadcast) the above to make
// sum(dE/dY)-sum(dE/dY \cdot Y) \cdot Y
// sum(dE/dY)-sum(dE/dY Y) Y
whichblas.Sgemm(blas.NoTrans, blas.NoTrans, n, channels, 1, 1, bsm, 1, meanTmp, channels, 0, nbc, channels)
whichblas.Sgemm(blas.NoTrans, blas.NoTrans, nc, spatialDim, 1, 1, nbc, 1, ssm, spatialDim, 1, ig, spatialDim)

// dE/dY - mean(dE/dY)-mean(dE/dY \cdot Y) \cdot Y
// dE/dY - mean(dE/dY)-mean(dE/dY Y) Y
beta := (-1.0 / float32(n*spatialDim))
vecf32.Scale(ig, beta)
vecf32.Add(ig, og)
@@ -489,7 +489,7 @@ func (op repeatOp) WriteHash(h hash.Hash) {

func (op repeatOp) Hashcode() uint32 { return simpleHash(op) }

// sliceOp represents a slicing operation. If end <= start, it means ":"
// sliceOp represents a slicing operation. If end start, it means ":"
type sliceOp struct {
tensor.Slice

@@ -646,7 +646,7 @@ func Reshape(n *Node, to tensor.Shape) (retVal *Node, err error) {
// Tensordot performs a tensor contraction of a and b along specified axes.
func Tensordot(aAxes []int, bAxes []int, a, b *Node) (retVal *Node, err error) {

// Check if input tensors actually have dim >= 1
// Check if input tensors actually have dim 1
if (len(a.Shape()) < 1) || (len(b.Shape()) < 1) || (a.Dims() < 1) || (b.Dims() < 1) {
return nil, errors.New("Input Node's shape should have length at least 1")
}
@@ -844,7 +844,7 @@ func hadamardDivDiff(ctx ExecutionContext, x, y, z *Node) (err error) {
xdv.SetDeriv(ver.Value()) // ignore errors on purpose
}

//dzdy = -x/y^2
//dzdy = -x/y²
// TODO: investigate if this can be done (if no other node uses z):
// unsafe do : neg zdv.d
// unsafe do : mul zdv.d, zdv.Value
@@ -35,8 +35,8 @@ func transposeBatch2D(shape tensor.Shape) tensor.Shape {

// KeepDims is a function that ensures that input and output dimensions are the same though the shape may change.
//
// The expandLeft flag in the function indicates if any shape expansion should be done leftwards or rightwards.
// For example, if fn() returns a tensor with a shape (3) and the desired dimension is 2,
// The expandLeft flag in the function indicates if any shape expansion should be done leftwards or rightwards.
// For example, if fn() returns a tensor with a shape (3) and the desired dimension is 2,
// then if `expandLeft` is true the result will be `(1, 3)`. Otherwise the result will be `(3, 1)`.
//
// At the moment, results that turn into scalars cannot have their dimensions kept - the semantics isn't well established yet and is a work in progress.
@@ -269,7 +269,7 @@ func (s *RMSPropSolver) Step(model []ValueGrad) (err error) {
}

cv := cached.Value
// cw = cw*decay + (1-decay) * grad^2
// cw = cw*decay + (1-decay) * grad²
switch cw := cv.(type) {
case *tensor.Dense:
var gt, gt2, w, regularized tensor.Tensor
@@ -545,7 +545,7 @@ func (s *AdamSolver) Step(model []ValueGrad) (err error) {
// prep done. Now let's apply the formula:
// the formula is
// (β_1 * m_t-1) + (1 - β_1)g_t .................. 1
// (β_2 * v_t-1) + (1 - β_2)*(g_t)^2 ............. 2
// (β_2 * v_t-1) + (1 - β_2)*(g_t)² ............. 2

// equation(1)
t1 := g.Clone().(*tensor.Dense)
@@ -1353,17 +1353,18 @@ func (s *AdaGradSolver) Step(model []ValueGrad) (err error) {

// BarzilaiBorweinSolver / Barzilai-Borwein performs Gradient Descent in steepest descend direction
// Solves 0 = F(x), by
// x_{i+1} = x_i - eta * Grad(F)(x_i)
// xᵢ₊₁ = xᵢ - eta * Grad(F)(xᵢ)
// Where the learn rate eta is calculated by the Barzilai-Borwein method:
// eta(x_i) = <(x_i - x_{i-1}), (Grad(F)(x_i) - Grad(F)(x_{i-1}))> /
// ||(Grad(F)(x_i) - Grad(F)(x_{i-1}))||^2
// eta(xᵢ) = <(xᵢ - xᵢ₋₁), (Grad(F)(xᵢ) - Grad(F)(xᵢ₋₁))> /
// (Grad(F)(xᵢ) - Grad(F)(xᵢ₋₁))∥²
// The input learn rate is used for the first iteration.
//
// TODO: Check out stochastic implementations, e.g. "Barzilai-Borwein Step Size for Stochastic Gradient Descent" https://arxiv.org/abs/1605.04131
type BarzilaiBorweinSolver struct {
eta float64 // initial learn rate
clip float64 // clip value
useClip bool
prevDV []*dualValue // dual value for x_{i-1} step
prevDV []*dualValue // dual value for xᵢ₋₁ step
}

// NewBarzilaiBorweinSolver creates a new Barzilai-Borwein solver withs some default values:
@@ -1431,7 +1432,7 @@ func (s *BarzilaiBorweinSolver) Step(model []ValueGrad) (err error) {
return errors.Wrap(err, subFail)
}

// <(x_i - x_{i-1}), (Grad(F)(x_i) - Grad(F)(x_{i-1}))>
// <(xᵢ - xᵢ₋₁), (Grad(F)(xᵢ) - Grad(F)(xᵢ₋₁))>

// Scalar Product == Total tensor contraction
dims := valueDiff.Dims()
@@ -1448,7 +1449,7 @@ func (s *BarzilaiBorweinSolver) Step(model []ValueGrad) (err error) {

nominator += valGradDiffscalarProd.Data().(float64)

// ||(Grad(F)(x_i) - Grad(F)(x_{i-1}))||^2
// (Grad(F)(xᵢ) - Grad(F)(xᵢ₋₁))∥²
gradDiffscalarProd, err := tensor.Contract(gradDiff, gradDiff, contractionAxes, contractionAxes)
if err != nil {
return errors.New("operationError, Contracting value / gradient difference")
@@ -392,8 +392,8 @@ func TestBarzilaiBorweinSolver(t *testing.T) {
// which is used as a performance test problem for optimization algorithms.
// https://en.wikipedia.org/wiki/Rosenbrock_function
//
// f(x,y) = (a-x)^2 + b(y-x^2)^2
// It has a global minimum at (x, y) = (a, a^2), where f(x,y) = 0.
// f(x,y) = (a-x)² + b(y-x²)²
// It has a global minimum at (x, y) = (a, a²), where f(x,y) = 0.
// Usually a = 1, b = 100, then the minimum is at x = y = 1
// TODO: There is also an n-dimensional version...see wiki
func model2dRosenbrock(a, b, xInit, yInit float64) (z, cost *Node, machine *tapeMachine, err error) {

0 comments on commit 462ed1e

Please sign in to comment.
You can’t perform that action at this time.