From 6225e920eeae98759a718688265d34ee255370ee Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Fri, 9 Oct 2020 15:09:34 -0500 Subject: [PATCH 01/12] Adding Softmax operation It's still missing the methods to allow to back propagate it --- op_softmax.go | 281 +++++++++++++++++++++++++++++++++++++++++++++ op_softmax_test.go | 39 +++++++ 2 files changed, 320 insertions(+) create mode 100644 op_softmax.go create mode 100644 op_softmax_test.go diff --git a/op_softmax.go b/op_softmax.go new file mode 100644 index 00000000..7a52bfc7 --- /dev/null +++ b/op_softmax.go @@ -0,0 +1,281 @@ +package gorgonia + +import ( + "fmt" + "hash" + "math" + + "github.com/chewxy/hm" + "github.com/pkg/errors" + "gorgonia.org/tensor" +) + +type softmaxOp struct { +} + +func newSoftmaxOp(inputShape tensor.Shape) *softmaxOp { + softmaxop := &softmaxOp{} + + return softmaxop +} + +// Softmax - implements the softmax operation described here: http://proceedings.mlr.press/v48/martins16.pdf +// Current implementation only supports float64 +func Softmax(x *Node) (*Node, error) { + xShape := x.Shape() + op := newSoftmaxOp(xShape) + + return ApplyOp(op, x) +} + +func (op *softmaxOp) Arity() int { + return 1 +} + +func (op *softmaxOp) ReturnsPtr() bool { return false } + +func (op *softmaxOp) CallsExtern() bool { return false } + +func (op *softmaxOp) WriteHash(h hash.Hash) { + fmt.Fprintf(h, "Softmax{}()") +} + +func (op *softmaxOp) Hashcode() uint32 { return simpleHash(op) } + +func (op *softmaxOp) String() string { + return fmt.Sprintf("Softmax{}()") +} + +func (op *softmaxOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { + s := inputs[0].(tensor.Shape).Clone() + return s, nil +} + +func (op *softmaxOp) Type() hm.Type { + a := hm.TypeVariable('a') + t := newTensorType(1, a) + + return hm.NewFnType(t, t) +} + +func (op *softmaxOp) OverwritesInput() int { return -1 } + +func (op *softmaxOp) checkInput(inputs ...Value) (tensor.Tensor, error) { + if err := checkArity(op, len(inputs)); err != nil { + return nil, err + } + + var in tensor.Tensor + var ok bool + + if in, ok = inputs[0].(tensor.Tensor); !ok { + return nil, errors.Errorf("Expected input to be a tensor") + } + + if in.Shape().Dims() != 1 { + return nil, errors.Errorf("Expected input to have 1 dimensions") + } + + return in, nil +} + +func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { + inputTensor, err := op.checkInput(inputs...) + if err != nil { + return nil, fmt.Errorf("Can't check Softmax input: %w", err) + } + + var output interface{} + + switch arr := inputTensor.Data().(type) { + case []float64: + output = float64softMax(arr) + case []float32: + output = float32softMax(arr) + default: + return nil, fmt.Errorf("Softmax needs either []float32 or []float64, got %T", arr) + } + + return tensor.New(tensor.Of(inputTensor.Dtype()), tensor.WithShape(inputTensor.Size()), tensor.WithEngine(inputTensor.Engine()), tensor.WithBacking(output)), nil +} + +// FIXME: go2 +func float64softMax(arr []float64) interface{} { + output := make([]float64, len(arr)) + sum := 0.0 + + for i, v := range arr { + exp := math.Exp(v) + sum += exp + + output[i] = exp + } + + for i := range output { + output[i] /= sum + } + + return output +} + +func float32softMax(arr []float32) interface{} { + output := make([]float32, len(arr)) + sum := float32(0.0) + + for i, v := range arr { + exp := float32(math.Exp(float64(v))) + sum += exp + + output[i] = exp + } + + for i := range output { + output[i] /= sum + } + + return output +} + +type softmaxDiffOp struct { +} + +func newSoftmaxOpDiff() *softmaxDiffOp { + return &softmaxDiffOp{} +} + +func (op *softmaxDiffOp) Arity() int { + return 2 +} + +func (op *softmaxDiffOp) ReturnsPtr() bool { return false } + +func (op *softmaxDiffOp) CallsExtern() bool { return false } + +func (op *softmaxDiffOp) WriteHash(h hash.Hash) { + fmt.Fprintf(h, "SoftmaxDiff{}()") +} + +func (op *softmaxDiffOp) Hashcode() uint32 { return simpleHash(op) } + +func (op *softmaxDiffOp) String() string { + return fmt.Sprintf("SoftmaxDiff{}()") +} + +func (op *softmaxDiffOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { + s := inputs[0].(tensor.Shape).Clone() + + return s, nil +} + +func (op *softmaxDiffOp) Type() hm.Type { + aType := hm.TypeVariable('a') + + ta := newTensorType(1, aType) + + return hm.NewFnType(ta, ta, ta) // f(float64, float64) float64 +} + +func (op *softmaxDiffOp) OverwritesInput() int { return -1 } + +func (op *softmaxDiffOp) checkInput(inputs ...Value) (tensor.Tensor, tensor.Tensor, error) { + if err := checkArity(op, len(inputs)); err != nil { + return nil, nil, err + } + + var ( + in tensor.Tensor + + gradient tensor.Tensor + ok bool + ) + + switch t := inputs[0].(type) { + case *dualValue: + if in, ok = t.Value.(tensor.Tensor); !ok { + return nil, nil, errors.Errorf("input should be a tensor, got %T", inputs[0]) + } + case tensor.Tensor: + in = t + default: + return nil, nil, errors.Errorf("input type is not supported, got %T", inputs[0]) + } + + switch t := inputs[1].(type) { + case *dualValue: + if gradient, ok = t.Value.(tensor.Tensor); !ok { + return nil, nil, errors.Errorf("gradient should be a tensor, got %T", inputs[1]) + } + case tensor.Tensor: + gradient = t + default: + return nil, nil, errors.Errorf("gradient type is not supported, got %T", inputs[1]) + } + + if in.Shape().Dims() != 1 || gradient.Shape().Dims() != 1 { + return nil, nil, errors.Errorf("Expected input to have 1 dimensions") + } + + return in, gradient, nil +} + +func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { + inputTensor, gradTensor, err := op.checkInput(inputs...) + if err != nil { + return nil, fmt.Errorf("Can't check SoftmaxDiff input: %w", err) + } + + if inputTensor.Size() != gradTensor.Size() { + return nil, fmt.Errorf("softmaxDiffOp.Do inputs sizes should be equal") + } + + if !isFloat32Or64Array(inputTensor.Data()) { + return nil, fmt.Errorf("softmaxDiffOp.Do expected input to be []float64 or []float32, got %T", inputTensor.Data()) + } + + if !isFloat32Or64Array(gradTensor.Data()) { + return nil, fmt.Errorf("softmaxDiffOp.Do expected input to be []float64, got %T", gradTensor.Data()) + } + + input := inputTensor.Data().([]float64) + value := gradTensor.Data().([]float64) + + output := make([]float64, len(input)*len(value)) + + for i := 0; i < len(value); i++ { + for j := 0; j < len(input); j++ { + if i == j { + output[i*j+j] = value[i] * (1 - input[i]) + } else { + output[i*j+j] = -value[i] * input[i] + } + } + } + + val := tensor.New( + tensor.Of(inputTensor.Dtype()), + tensor.WithShape(len(input), len(value)), + tensor.WithEngine(inputTensor.Engine()), + tensor.WithBacking(output), // FIXME + ) + + return val, nil +} + +func isFloat32Or64Array(v interface{}) bool { + if _, ok := v.([]float64); ok { + return true + } + + if _, ok := v.([]float32); ok { + return true + } + + return false +} + +// ensure it complies with the Op interface +var ( + _ Op = &softmaxOp{} + + _ Op = &softmaxDiffOp{} +) diff --git a/op_softmax_test.go b/op_softmax_test.go new file mode 100644 index 00000000..f654662e --- /dev/null +++ b/op_softmax_test.go @@ -0,0 +1,39 @@ +package gorgonia + +import ( + "testing" + + "github.com/stretchr/testify/require" + "gorgonia.org/tensor" +) + +var testCasesSoftMaxDo = []struct { + input []float64 + expected []float64 +}{ + { + []float64{0.2094, -1.0, 0.6411, 0.0, -0.3909}, []float64{0.2382105379413429, 0.07107636737487558, 0.36681399568548617, 0.19320559786800362, 0.13069350113029174}, + }, + { + []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, []float64{7.801341612780742e-05, 0.00021206245143623275, 0.0005764455082375902, 0.0015669413501390804, 0.004259388198344144, 0.0115782175399118, 0.031472858344688034, 0.08555209892803112, 0.23255471590259755, 0.6321492583604866}, + }, + { + []float64{0.1, 0.1, 0.1}, []float64{0.3333333333333333, 0.3333333333333333, 0.3333333333333333}, + }, + { + []float64{-0.1, 0.3, -1.1, 2.7}, []float64{0.05180179352659075, 0.07727919496508177, 0.019056814854240642, 0.8518621966540868}, + }, +} + +func TestSoftmaxDo(t *testing.T) { + c := require.New(t) + + for i, testCase := range testCasesSoftMaxDo { + tt := tensor.New(tensor.Of(tensor.Float64), tensor.WithShape(len(testCase.input)), tensor.WithBacking(testCase.input)) + op := newSoftmaxOp(tt.Shape()) + + out, err := op.Do(tt) + c.NoError(err, "failed test case: %d", i) + c.Equal(testCase.expected, out.Data(), "failed test case: %d", i) + } +} From f1ab8c3f069841205f2f03a7a776e052a960ba4f Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Sat, 10 Oct 2020 17:38:06 -0500 Subject: [PATCH 02/12] Improve implementation of the SoftMap operation --- op_softmax.go | 161 ++++++++++++++++---------------------------------- 1 file changed, 52 insertions(+), 109 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index 7a52bfc7..3ba0f3ba 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -3,7 +3,6 @@ package gorgonia import ( "fmt" "hash" - "math" "github.com/chewxy/hm" "github.com/pkg/errors" @@ -11,19 +10,23 @@ import ( ) type softmaxOp struct { + shape tensor.Shape + axes []int } -func newSoftmaxOp(inputShape tensor.Shape) *softmaxOp { - softmaxop := &softmaxOp{} +func newSoftmaxOp(inputShape tensor.Shape, axes ...int) *softmaxOp { + softmaxop := &softmaxOp{ + shape: inputShape, + axes: axes, + } return softmaxop } -// Softmax - implements the softmax operation described here: http://proceedings.mlr.press/v48/martins16.pdf -// Current implementation only supports float64 -func Softmax(x *Node) (*Node, error) { +// SoftMax2 - implements the softmax operation +func SoftMax2(x *Node, axis ...int) (*Node, error) { xShape := x.Shape() - op := newSoftmaxOp(xShape) + op := newSoftmaxOp(xShape, axis...) return ApplyOp(op, x) } @@ -85,55 +88,36 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { return nil, fmt.Errorf("Can't check Softmax input: %w", err) } - var output interface{} - - switch arr := inputTensor.Data().(type) { - case []float64: - output = float64softMax(arr) - case []float32: - output = float32softMax(arr) - default: - return nil, fmt.Errorf("Softmax needs either []float32 or []float64, got %T", arr) + aShape := inputTensor.Shape() + axis := aShape.Dims() - 1 // default: last dim + if aShape.IsColVec() || (aShape.IsVector() && !aShape.IsRowVec()) { + axis = 0 } - return tensor.New(tensor.Of(inputTensor.Dtype()), tensor.WithShape(inputTensor.Size()), tensor.WithEngine(inputTensor.Engine()), tensor.WithBacking(output)), nil -} - -// FIXME: go2 -func float64softMax(arr []float64) interface{} { - output := make([]float64, len(arr)) - sum := 0.0 - - for i, v := range arr { - exp := math.Exp(v) - sum += exp + if len(op.axes) > 0 { + if op.axes[0] >= axis+1 || op.axes[0] < 0 { + return nil, errors.Errorf("Cannot perform SoftMax on axis %d. Input has shape %v", op.axes[0], aShape) + } - output[i] = exp + axis = op.axes[0] } - for i := range output { - output[i] /= sum + exp, err := tensor.Exp(inputTensor) + if err != nil { + return nil, fmt.Errorf("error calculating exp for SoftMax: %w", err) } - return output -} - -func float32softMax(arr []float32) interface{} { - output := make([]float32, len(arr)) - sum := float32(0.0) - - for i, v := range arr { - exp := float32(math.Exp(float64(v))) - sum += exp - - output[i] = exp + sum, err := tensor.Sum(exp, axis) + if err != nil { + return nil, fmt.Errorf("error calculating sum for SoftMax: %w", err) } - for i := range output { - output[i] /= sum + div, err := tensor.Div(exp, sum) + if err != nil { + return nil, fmt.Errorf("error calculating div for SoftMax: %w", err) } - return output + return div, nil } type softmaxDiffOp struct { @@ -144,7 +128,7 @@ func newSoftmaxOpDiff() *softmaxDiffOp { } func (op *softmaxDiffOp) Arity() int { - return 2 + return 1 } func (op *softmaxDiffOp) ReturnsPtr() bool { return false } @@ -172,105 +156,64 @@ func (op *softmaxDiffOp) Type() hm.Type { ta := newTensorType(1, aType) - return hm.NewFnType(ta, ta, ta) // f(float64, float64) float64 + return hm.NewFnType(ta, ta) // f(float64) float64 } func (op *softmaxDiffOp) OverwritesInput() int { return -1 } -func (op *softmaxDiffOp) checkInput(inputs ...Value) (tensor.Tensor, tensor.Tensor, error) { +func (op *softmaxDiffOp) checkInput(inputs ...Value) (tensor.Tensor, error) { if err := checkArity(op, len(inputs)); err != nil { - return nil, nil, err + return nil, err } var ( in tensor.Tensor - gradient tensor.Tensor - ok bool + ok bool ) switch t := inputs[0].(type) { case *dualValue: if in, ok = t.Value.(tensor.Tensor); !ok { - return nil, nil, errors.Errorf("input should be a tensor, got %T", inputs[0]) + return nil, errors.Errorf("input should be a tensor, got %T", inputs[0]) } case tensor.Tensor: in = t default: - return nil, nil, errors.Errorf("input type is not supported, got %T", inputs[0]) - } - - switch t := inputs[1].(type) { - case *dualValue: - if gradient, ok = t.Value.(tensor.Tensor); !ok { - return nil, nil, errors.Errorf("gradient should be a tensor, got %T", inputs[1]) - } - case tensor.Tensor: - gradient = t - default: - return nil, nil, errors.Errorf("gradient type is not supported, got %T", inputs[1]) + return nil, errors.Errorf("input type is not supported, got %T", inputs[0]) } - if in.Shape().Dims() != 1 || gradient.Shape().Dims() != 1 { - return nil, nil, errors.Errorf("Expected input to have 1 dimensions") - } - - return in, gradient, nil + return in, nil } func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { - inputTensor, gradTensor, err := op.checkInput(inputs...) + inputTensor, err := op.checkInput(inputs...) if err != nil { return nil, fmt.Errorf("Can't check SoftmaxDiff input: %w", err) } - if inputTensor.Size() != gradTensor.Size() { - return nil, fmt.Errorf("softmaxDiffOp.Do inputs sizes should be equal") - } - - if !isFloat32Or64Array(inputTensor.Data()) { - return nil, fmt.Errorf("softmaxDiffOp.Do expected input to be []float64 or []float32, got %T", inputTensor.Data()) - } - - if !isFloat32Or64Array(gradTensor.Data()) { - return nil, fmt.Errorf("softmaxDiffOp.Do expected input to be []float64, got %T", gradTensor.Data()) - } - - input := inputTensor.Data().([]float64) - value := gradTensor.Data().([]float64) - - output := make([]float64, len(input)*len(value)) - - for i := 0; i < len(value); i++ { - for j := 0; j < len(input); j++ { - if i == j { - output[i*j+j] = value[i] * (1 - input[i]) - } else { - output[i*j+j] = -value[i] * input[i] - } - } + diag, err := tensor.Diag(inputTensor) + if err != nil { + return nil, fmt.Errorf("softmaxDiffOp.Do error calculating diag: %w", err) } - val := tensor.New( - tensor.Of(inputTensor.Dtype()), - tensor.WithShape(len(input), len(value)), - tensor.WithEngine(inputTensor.Engine()), - tensor.WithBacking(output), // FIXME - ) + sm := inputTensor.Clone().(tensor.Tensor) + sm.Reshape(inputTensor.Shape().TotalSize(), 1) - return val, nil -} + smT := sm.Clone().(tensor.Tensor) + smT.Transpose() -func isFloat32Or64Array(v interface{}) bool { - if _, ok := v.([]float64); ok { - return true + smDot, err := tensor.Dot(sm, smT) + if err != nil { + return nil, fmt.Errorf("softmaxDiffOp.Do error calculating dot product: %w", err) } - if _, ok := v.([]float32); ok { - return true + result, err := tensor.Sub(diag, smDot) + if err != nil { + return nil, fmt.Errorf("softmaxDiffOp.Do error calculating sub: %w", err) } - return false + return result, nil } // ensure it complies with the Op interface From 8bd8db48182a1a936488d9e59dbad2a96df66511 Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Sun, 11 Oct 2020 15:18:38 -0500 Subject: [PATCH 03/12] Replace SoftMax with the operation --- op_softmax.go | 62 ++++++++++++++++++++++++++++++++++++++++------ operations.go | 2 +- operations_test.go | 3 ++- 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index 3ba0f3ba..42629644 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -23,8 +23,8 @@ func newSoftmaxOp(inputShape tensor.Shape, axes ...int) *softmaxOp { return softmaxop } -// SoftMax2 - implements the softmax operation -func SoftMax2(x *Node, axis ...int) (*Node, error) { +// SoftMax - implements the softmax operation +func SoftMax(x *Node, axis ...int) (*Node, error) { xShape := x.Shape() op := newSoftmaxOp(xShape, axis...) @@ -120,6 +120,55 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { return div, nil } +// DoDiff calculates the diff and sets its value to the output node. Implementation for ADOp interface. +func (op *softmaxOp) DoDiff(ctx ExecutionContext, inputs Nodes, output *Node) error { + if len(inputs) != 1 { + return fmt.Errorf("SoftmaxOp.DoDiff needs 1 arguments") + } + + odv := output.boundTo.(*dualValue) + odvd := odv.Value.(tensor.Tensor) + diffOp := newSoftmaxOpDiff() + + result, err := diffOp.Do() + if err != nil { + return err + } + + sum, err := odvd.(*tensor.Dense).Add(result.(*tensor.Dense), tensor.UseUnsafe()) + if err != nil { + return err + } + + odv.d = sum + + return nil +} + +// SymDiff applies the diff op. Implementation for SDOp interface. +func (op *softmaxOp) SymDiff(inputs Nodes, output, grad *Node) (Nodes, error) { + err := checkArity(op, len(inputs)) + if err != nil { + return nil, err + } + + diffOp := newSoftmaxOpDiff() + nodes := make(Nodes, 1) + + nodes[0], err = ApplyOp(diffOp, output) + + return nodes, err +} + +// DiffWRT is an implementation for the SDOp interface +func (op *softmaxOp) DiffWRT(inputs int) []bool { + if inputs != 1 { + panic(fmt.Sprintf("softmax operator only supports one input, got %d instead", inputs)) + } + + return []bool{true} +} + type softmaxDiffOp struct { } @@ -192,10 +241,7 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { return nil, fmt.Errorf("Can't check SoftmaxDiff input: %w", err) } - diag, err := tensor.Diag(inputTensor) - if err != nil { - return nil, fmt.Errorf("softmaxDiffOp.Do error calculating diag: %w", err) - } + diag := tensor.New(tensor.AsDenseDiag(inputTensor)) sm := inputTensor.Clone().(tensor.Tensor) sm.Reshape(inputTensor.Shape().TotalSize(), 1) @@ -218,7 +264,9 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { // ensure it complies with the Op interface var ( - _ Op = &softmaxOp{} + _ Op = &softmaxOp{} + _ ADOp = &softmaxOp{} + _ SDOp = &softmaxOp{} _ Op = &softmaxDiffOp{} ) diff --git a/operations.go b/operations.go index 9ffd3619..a15057fe 100644 --- a/operations.go +++ b/operations.go @@ -157,7 +157,7 @@ func unaryOpNode(op Op, a *Node) (retVal *Node, err error) { // e^(a[i]) / sum((e^(a[i]))) // For a more numerically stable SoftMax, use StableSoftMax. // TODO: MULTI RANK SOFTMAX -func SoftMax(a *Node, axes ...int) (retVal *Node, err error) { +func SoftMaxOld(a *Node, axes ...int) (retVal *Node, err error) { aShape := a.Shape() axis := aShape.Dims() - 1 // default: last dim if a.IsColVec() || (a.IsVector() && !a.IsRowVec()) { diff --git a/operations_test.go b/operations_test.go index ad1ca075..dcb0cba7 100644 --- a/operations_test.go +++ b/operations_test.go @@ -399,7 +399,8 @@ func TestMisha(t *testing.T) { func TestSoftMax(t *testing.T) { defer runtime.GC() - assert := assert.New(t) + + assert := require.New(t) g := NewGraph() xT := tensor.New(tensor.WithBacking([]float64{0.1, 0.2, -0.3, 0.4, 0.5})) x := NewVector(g, Float64, WithShape(5), WithValue(xT)) From 22ae75acd506440fe8964581121a5a4c3f595c44 Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Sun, 11 Oct 2020 21:59:14 -0500 Subject: [PATCH 04/12] Improve tensor operations in soft max And handle the errors properly --- op_softmax.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index 42629644..a294e854 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -244,12 +244,20 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { diag := tensor.New(tensor.AsDenseDiag(inputTensor)) sm := inputTensor.Clone().(tensor.Tensor) - sm.Reshape(inputTensor.Shape().TotalSize(), 1) + + err = sm.Reshape(inputTensor.Shape().TotalSize(), 1) + if err != nil { + return nil, fmt.Errorf("softmaxDiffOp.Do error reshaping the value: %w", err) + } smT := sm.Clone().(tensor.Tensor) - smT.Transpose() - smDot, err := tensor.Dot(sm, smT) + err = smT.T() + if err != nil { + return nil, fmt.Errorf("softmaxDiffOp.Do error transposing the value: %w", err) + } + + smDot, err := tensor.MatMul(sm, smT) if err != nil { return nil, fmt.Errorf("softmaxDiffOp.Do error calculating dot product: %w", err) } From 030cfb1ecef81b6e5939b7d1b75cb8b814b7e5df Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Mon, 12 Oct 2020 11:48:02 -0500 Subject: [PATCH 05/12] Fix broken test And add debugging statements --- complex_test.go | 7 ++++++- known_issues_test.go | 11 ++++++----- op_math.go | 10 +++++++++- op_softmax.go | 15 +++++++++------ 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/complex_test.go b/complex_test.go index 9c1e2ac7..bdcad4c8 100644 --- a/complex_test.go +++ b/complex_test.go @@ -1,6 +1,9 @@ package gorgonia -import "testing" +import ( + "runtime/debug" + "testing" +) func TestWeirdNetwork(t *testing.T) { const ( @@ -138,6 +141,8 @@ func TestWeirdNetwork(t *testing.T) { for i := 0; i < 2; i++ { if err = m.RunAll(); err != nil { t.Errorf("%d %v", i, err) + t.Log(string(debug.Stack())) + break } diff --git a/known_issues_test.go b/known_issues_test.go index 4b060117..9c2f3321 100644 --- a/known_issues_test.go +++ b/known_issues_test.go @@ -1,10 +1,10 @@ package gorgonia import ( - "log" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "gorgonia.org/tensor" ) @@ -303,11 +303,12 @@ func TestIssue363(t *testing.T) { } func TestIssue368(t *testing.T) { + c := require.New(t) + g := NewGraph() x := NewTensor(g, Float32, 2, WithShape(2, 5), WithInit(GlorotU(1.0))) + sm, err := SoftMax(x, 1) - if err != nil { - log.Fatal(err) - } - _ = sm + c.NoError(err) + c.NotNil(sm) } diff --git a/op_math.go b/op_math.go index f418030d..c59426e6 100644 --- a/op_math.go +++ b/op_math.go @@ -732,6 +732,7 @@ func (op linAlgBinOp) do(inputs []Value, opts ...tensor.FuncOpt) (retVal Value, if err = a.T(); err != nil { return nil, errors.Wrap(err, tFail) } + // untranspose defer a.T() } @@ -740,6 +741,7 @@ func (op linAlgBinOp) do(inputs []Value, opts ...tensor.FuncOpt) (retVal Value, if err = b.T(); err != nil { return nil, errors.Wrap(err, tFail) } + // untranspose defer b.T() } @@ -751,9 +753,11 @@ func (op linAlgBinOp) do(inputs []Value, opts ...tensor.FuncOpt) (retVal Value, retVal, err = tensor.MatVecMul(a, b, opts...) case vecDotOperator: var ret interface{} + if ret, err = tensor.Inner(a, b); err != nil { return nil, errors.Wrapf(err, "Failed to carry out linalgBinOp operation %v", op) } + retVal, _ = anyToScalar(ret) case outerProdOperator: retVal, err = tensor.Outer(a, b, opts...) @@ -761,8 +765,12 @@ func (op linAlgBinOp) do(inputs []Value, opts ...tensor.FuncOpt) (retVal Value, // checks were done when the op was created retVal, err = batchedMatMul(a, b, nil, op.transA, op.transB, false) } - return + if err != nil { + return nil, fmt.Errorf("linAlgBinOp %v %s %v error: %w", a.Shape(), op.āBinaryOperator, b.Shape(), err) + } + + return retVal, nil } func (op linAlgBinOp) preallocBatchMatMul(incr bool, prealloc Value, inputs ...Value) (retVal Value, err error) { diff --git a/op_softmax.go b/op_softmax.go index a294e854..dc68c816 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -50,15 +50,16 @@ func (op *softmaxOp) String() string { } func (op *softmaxOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { - s := inputs[0].(tensor.Shape).Clone() - return s, nil + s := inputs[0].(tensor.Shape) + + return tensor.Shape{s.TotalSize()}, nil } func (op *softmaxOp) Type() hm.Type { a := hm.TypeVariable('a') t := newTensorType(1, a) - return hm.NewFnType(t, t) + return hm.NewFnType(t, t) // f(float64) float64 } func (op *softmaxOp) OverwritesInput() int { return -1 } @@ -68,8 +69,10 @@ func (op *softmaxOp) checkInput(inputs ...Value) (tensor.Tensor, error) { return nil, err } - var in tensor.Tensor - var ok bool + var ( + in tensor.Tensor + ok bool + ) if in, ok = inputs[0].(tensor.Tensor); !ok { return nil, errors.Errorf("Expected input to be a tensor") @@ -94,6 +97,7 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { axis = 0 } + // FIXME: v0.10 if len(op.axes) > 0 { if op.axes[0] >= axis+1 || op.axes[0] < 0 { return nil, errors.Errorf("Cannot perform SoftMax on axis %d. Input has shape %v", op.axes[0], aShape) @@ -242,7 +246,6 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { } diag := tensor.New(tensor.AsDenseDiag(inputTensor)) - sm := inputTensor.Clone().(tensor.Tensor) err = sm.Reshape(inputTensor.Shape().TotalSize(), 1) From 64352142d9b33b2a8eced2aa9f2ccb9e26a6f701 Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Mon, 12 Oct 2020 18:35:04 -0500 Subject: [PATCH 06/12] WIP --- example_err_test.go | 2 ++ example_operations_test.go | 17 ++++++---- op_softmax.go | 66 +++++++++++++++++++------------------- 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/example_err_test.go b/example_err_test.go index 297ce11a..b32aca9e 100644 --- a/example_err_test.go +++ b/example_err_test.go @@ -48,6 +48,7 @@ func Example_errorHandling() { )), )), )) + fmt.Printf("nn2: %v\n", nn2) defer func() { @@ -55,6 +56,7 @@ func Example_errorHandling() { fmt.Printf("An error occurs (caught by recover()): %v\n", r) } }() + nn2PlusWrong := Must(Add(nn2, wrong2)) _ = nn2PlusWrong diff --git a/example_operations_test.go b/example_operations_test.go index 9180cf67..34d7f121 100644 --- a/example_operations_test.go +++ b/example_operations_test.go @@ -2,7 +2,6 @@ package gorgonia import ( "fmt" - "log" "strings" "gorgonia.org/tensor" @@ -29,12 +28,16 @@ func ExampleSoftMax() { sm := Must(SoftMax(c)) m := NewTapeMachine(g) if err := m.RunAll(); err != nil { - log.Fatal(err) + panic(err) } + fmt.Printf("a:\n%v\nsoftmax(a) - along last axis (default behaviour):\n%1.2f", a.Value(), sm1.Value()) fmt.Printf("b:\n%v\nsoftmax(b) - along axis 0:\n%1.2f", b.Value(), sm0.Value()) + tmp := fmt.Sprintf("c %v:\n%v\nsoftmax(c) - along last axis (default behaviour) %v:\n%1.2f", c.Value().Shape(), c.Value(), sm.Value().Shape(), sm.Value()) + fmt.Println(strings.Replace(tmp, "\n\n\n", "\n\n", -1)) + // the requirement to use tmp and strings.Replace is because when Go runs example tests, it strips excess newlines. // Output: @@ -76,12 +79,12 @@ func ExampleConcat() { z, err := Concat(2, x, y) if err != nil { - log.Fatal(err) + panic(err) } m := NewTapeMachine(g) if err := m.RunAll(); err != nil { - log.Fatal(err) + panic(err) } tmp := fmt.Sprintf("z %v\n%v", z.Value().Shape(), z.Value()) fmt.Println(strings.Replace(tmp, "\n\n", "\n", -1)) // this is because @@ -155,18 +158,18 @@ func ExampleUnconcat() { z, err := Concat(2, x, y) if err != nil { - log.Fatal(err) + panic(err) } unconcats, err := Unconcat(z, 2, 2) if err != nil { - log.Fatal(err) + panic(err) } a, b := unconcats[0], unconcats[1] m := NewTapeMachine(g) if err := m.RunAll(); err != nil { - log.Fatal(err) + panic(err) } tmp := fmt.Sprintf("a %v\n%v\nb %v\n%v", a.Value().Shape(), a.Value(), b.Value().Shape(), b.Value()) fmt.Println(strings.Replace(tmp, "\n\n", "\n", -1)) diff --git a/op_softmax.go b/op_softmax.go index dc68c816..3502fb3f 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -3,6 +3,7 @@ package gorgonia import ( "fmt" "hash" + "os" "github.com/chewxy/hm" "github.com/pkg/errors" @@ -78,10 +79,6 @@ func (op *softmaxOp) checkInput(inputs ...Value) (tensor.Tensor, error) { return nil, errors.Errorf("Expected input to be a tensor") } - if in.Shape().Dims() != 1 { - return nil, errors.Errorf("Expected input to have 1 dimensions") - } - return in, nil } @@ -116,12 +113,40 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { return nil, fmt.Errorf("error calculating sum for SoftMax: %w", err) } - div, err := tensor.Div(exp, sum) + ss := sum.Shape() + dimsDiff := exp.Shape().Dims() - ss.Dims() + if dimsDiff == 0 { + div, err := tensor.Div(exp, sum) + if err != nil { + return nil, fmt.Errorf("error calculating div for SoftMax: %w", err) + } + + return div, nil + } + + fmt.Fprintf(os.Stderr, "initial sum: %v axis=%d expShape=%v expDims=%d\nDIFF: %d\n", sum, axis, exp.Shape(), exp.Dims(), dimsDiff) + + newShape := tensor.Shape(tensor.BorrowInts(ss.Dims() + dimsDiff)) + copy(newShape, ss) + copy(newShape[axis+1:], newShape[axis:]) + newShape[axis] = 1 + + fmt.Fprintf(os.Stderr, "new shape: %v\n", newShape) + + if err = sum.Reshape(newShape...); err != nil { + return nil, fmt.Errorf("error reshaping sum for SoftMax: %w", err) + } + + fmt.Fprintf(os.Stderr, "sum reshaped: \n%v\nshape: %v\n", sum, sum.Shape()) + + sum, err = tensor.Repeat(sum, axis, exp.Shape()[1:]...) if err != nil { - return nil, fmt.Errorf("error calculating div for SoftMax: %w", err) + return nil, fmt.Errorf("error repeating sum for SoftMax: %w", err) } - return div, nil + fmt.Fprintf(os.Stderr, "sum repeated: \n%v\nshape: %v\nexp=\n%v\n", sum, sum.Shape(), exp) + + return tensor.Div(exp, sum) } // DoDiff calculates the diff and sets its value to the output node. Implementation for ADOp interface. @@ -245,32 +270,7 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { return nil, fmt.Errorf("Can't check SoftmaxDiff input: %w", err) } - diag := tensor.New(tensor.AsDenseDiag(inputTensor)) - sm := inputTensor.Clone().(tensor.Tensor) - - err = sm.Reshape(inputTensor.Shape().TotalSize(), 1) - if err != nil { - return nil, fmt.Errorf("softmaxDiffOp.Do error reshaping the value: %w", err) - } - - smT := sm.Clone().(tensor.Tensor) - - err = smT.T() - if err != nil { - return nil, fmt.Errorf("softmaxDiffOp.Do error transposing the value: %w", err) - } - - smDot, err := tensor.MatMul(sm, smT) - if err != nil { - return nil, fmt.Errorf("softmaxDiffOp.Do error calculating dot product: %w", err) - } - - result, err := tensor.Sub(diag, smDot) - if err != nil { - return nil, fmt.Errorf("softmaxDiffOp.Do error calculating sub: %w", err) - } - - return result, nil + return inputTensor, nil } // ensure it complies with the Op interface From 1eac0e6258462cf2e021b9da6c9b313105d6dc14 Mon Sep 17 00:00:00 2001 From: chewxy Date: Tue, 13 Oct 2020 11:14:03 +1100 Subject: [PATCH 07/12] Fixed softmax broadcasting issue Co-authored-by: David Cuadrado <73729+dcu@users.noreply.github.com> --- op_softmax.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index 3502fb3f..dc0bb008 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -3,7 +3,6 @@ package gorgonia import ( "fmt" "hash" - "os" "github.com/chewxy/hm" "github.com/pkg/errors" @@ -114,7 +113,8 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { } ss := sum.Shape() - dimsDiff := exp.Shape().Dims() - ss.Dims() + es := exp.Shape() + dimsDiff := es.Dims() - ss.Dims() if dimsDiff == 0 { div, err := tensor.Div(exp, sum) if err != nil { @@ -124,28 +124,21 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { return div, nil } - fmt.Fprintf(os.Stderr, "initial sum: %v axis=%d expShape=%v expDims=%d\nDIFF: %d\n", sum, axis, exp.Shape(), exp.Dims(), dimsDiff) + // MULTIDIMENSIONAL SOFTMAX newShape := tensor.Shape(tensor.BorrowInts(ss.Dims() + dimsDiff)) copy(newShape, ss) copy(newShape[axis+1:], newShape[axis:]) newShape[axis] = 1 - fmt.Fprintf(os.Stderr, "new shape: %v\n", newShape) - if err = sum.Reshape(newShape...); err != nil { return nil, fmt.Errorf("error reshaping sum for SoftMax: %w", err) } - fmt.Fprintf(os.Stderr, "sum reshaped: \n%v\nshape: %v\n", sum, sum.Shape()) - - sum, err = tensor.Repeat(sum, axis, exp.Shape()[1:]...) - if err != nil { + if sum, err = tensor.Repeat(sum, axis, es[axis]); err != nil { return nil, fmt.Errorf("error repeating sum for SoftMax: %w", err) } - fmt.Fprintf(os.Stderr, "sum repeated: \n%v\nshape: %v\nexp=\n%v\n", sum, sum.Shape(), exp) - return tensor.Div(exp, sum) } From 1f860421de7669d40fa556472877aef29a3ddc73 Mon Sep 17 00:00:00 2001 From: David Cuadrado Date: Mon, 12 Oct 2020 19:20:03 -0500 Subject: [PATCH 08/12] Fix Example_errorHandling test for SoftMax --- example_err_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/example_err_test.go b/example_err_test.go index b32aca9e..098e22f8 100644 --- a/example_err_test.go +++ b/example_err_test.go @@ -61,9 +61,8 @@ func Example_errorHandling() { _ = nn2PlusWrong // Output: - // nn: ÷ false(%a, %f) :: Matrix float32 - // An error occurs: Type inference error. Op: + false. Children: [Matrix float32, Matrix float64], OpType:Matrix a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified - // nn2: ÷ false(%a, %f) :: Matrix float32 - // An error occurs (caught by recover()): Type inference error. Op: + false. Children: [Matrix float32, Matrix float64], OpType:Matrix a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified - + // nn: Softmax{}()(%9) :: Vector float32 + // An error occurs: Type inference error. Op: + false. Children: [Vector float32, Matrix float64], OpType:Vector a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified + // nn2: Softmax{}()(%9) :: Vector float32 + // An error occurs (caught by recover()): Type inference error. Op: + false. Children: [Vector float32, Matrix float64], OpType:Vector a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified } From 9e3af62bbe48c119f79563da0489b2557b6fb136 Mon Sep 17 00:00:00 2001 From: chewxy Date: Tue, 13 Oct 2020 12:00:20 +1100 Subject: [PATCH 09/12] Added Difffor softmax Updated softmax test to also check for values Co-authored-by: David Cuadrado <73729+dcu@users.noreply.github.com> --- op_softmax.go | 17 ++++++++++++++--- operations_test.go | 13 ++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index dc0bb008..2ef384b4 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -126,6 +126,7 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { // MULTIDIMENSIONAL SOFTMAX + // REPEAT SUM (traditionally called broadcasting) newShape := tensor.Shape(tensor.BorrowInts(ss.Dims() + dimsDiff)) copy(newShape, ss) copy(newShape[axis+1:], newShape[axis:]) @@ -152,7 +153,7 @@ func (op *softmaxOp) DoDiff(ctx ExecutionContext, inputs Nodes, output *Node) er odvd := odv.Value.(tensor.Tensor) diffOp := newSoftmaxOpDiff() - result, err := diffOp.Do() + result, err := diffOp.Do(odv) if err != nil { return err } @@ -262,8 +263,18 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { if err != nil { return nil, fmt.Errorf("Can't check SoftmaxDiff input: %w", err) } - - return inputTensor, nil + y := inputTensor.(*tensor.Dense) + s := y.Shape() + fst := tensor.ProdInts([]int(s)) + y.Reshape(fst, 1) + yᵀ := y.ShallowClone() + yᵀ.T() + yyᵀ, err := tensor.MatMul(y, yᵀ) + if err != nil { + return nil, err + } + diag := tensor.New(tensor.AsDenseDiag(y.Data())) + return diag.Sub(yyᵀ.(*tensor.Dense)) // jacobian } // ensure it complies with the Op interface diff --git a/operations_test.go b/operations_test.go index dcb0cba7..04a44449 100644 --- a/operations_test.go +++ b/operations_test.go @@ -399,8 +399,7 @@ func TestMisha(t *testing.T) { func TestSoftMax(t *testing.T) { defer runtime.GC() - - assert := require.New(t) + assert := assert.New(t) g := NewGraph() xT := tensor.New(tensor.WithBacking([]float64{0.1, 0.2, -0.3, 0.4, 0.5})) x := NewVector(g, Float64, WithShape(5), WithValue(xT)) @@ -412,7 +411,7 @@ func TestSoftMax(t *testing.T) { t.Error(err) } - m := NewTapeMachine(g) + m := NewTapeMachine(g, TraceExec()) defer m.Close() if err := m.RunAll(); err != nil { t.Error(err) @@ -454,6 +453,14 @@ func TestSoftMax(t *testing.T) { assert.Equal(smg, sm2g) assert.Equal(xG, x2G) + + correctGrad := []float64{ + -0, -0, -8.379839604304342, -0, -0, + } + + if !floatsEqual64(correctGrad, smg.Data().([]float64)) { + t.Errorf("Expected results to be %v. Got %v.", correctGrad, smg.Data()) + } } var sliceTests = []struct { From edad66702275e7509414adc9ce35f82e632abe91 Mon Sep 17 00:00:00 2001 From: chewxy Date: Tue, 13 Oct 2020 22:48:04 +1100 Subject: [PATCH 10/12] Updated softmax op and softmax diffop --- op_softmax.go | 168 ++++++++++++++++++++++++++++++++------------- operations_test.go | 38 +++++----- 2 files changed, 137 insertions(+), 69 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index 2ef384b4..84d7ad04 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -6,18 +6,24 @@ import ( "github.com/chewxy/hm" "github.com/pkg/errors" + "gonum.org/v1/gonum/blas" + "gonum.org/v1/gonum/blas/gonum" "gorgonia.org/tensor" ) type softmaxOp struct { shape tensor.Shape - axes []int + axis int } func newSoftmaxOp(inputShape tensor.Shape, axes ...int) *softmaxOp { + axis := -1 + if len(axes) > 0 { + axis = axes[0] + } softmaxop := &softmaxOp{ shape: inputShape, - axes: axes, + axis: axis, } return softmaxop @@ -31,22 +37,20 @@ func SoftMax(x *Node, axis ...int) (*Node, error) { return ApplyOp(op, x) } -func (op *softmaxOp) Arity() int { - return 1 -} +func (op *softmaxOp) Arity() int { return 1 } func (op *softmaxOp) ReturnsPtr() bool { return false } func (op *softmaxOp) CallsExtern() bool { return false } func (op *softmaxOp) WriteHash(h hash.Hash) { - fmt.Fprintf(h, "Softmax{}()") + fmt.Fprintf(h, "Softmax{%v}()", op.axis) } func (op *softmaxOp) Hashcode() uint32 { return simpleHash(op) } func (op *softmaxOp) String() string { - return fmt.Sprintf("Softmax{}()") + return fmt.Sprintf("Softmax{%d}()", op.axis) } func (op *softmaxOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { @@ -57,9 +61,7 @@ func (op *softmaxOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { func (op *softmaxOp) Type() hm.Type { a := hm.TypeVariable('a') - t := newTensorType(1, a) - - return hm.NewFnType(t, t) // f(float64) float64 + return hm.NewFnType(a, a) // f(float64) float64 } func (op *softmaxOp) OverwritesInput() int { return -1 } @@ -92,14 +94,8 @@ func (op *softmaxOp) Do(inputs ...Value) (retVal Value, err error) { if aShape.IsColVec() || (aShape.IsVector() && !aShape.IsRowVec()) { axis = 0 } - - // FIXME: v0.10 - if len(op.axes) > 0 { - if op.axes[0] >= axis+1 || op.axes[0] < 0 { - return nil, errors.Errorf("Cannot perform SoftMax on axis %d. Input has shape %v", op.axes[0], aShape) - } - - axis = op.axes[0] + if op.axis != -1 { + axis = op.axis } exp, err := tensor.Exp(inputTensor) @@ -150,15 +146,16 @@ func (op *softmaxOp) DoDiff(ctx ExecutionContext, inputs Nodes, output *Node) er } odv := output.boundTo.(*dualValue) - odvd := odv.Value.(tensor.Tensor) + idv := inputs[0].boundTo.(*dualValue) + idvd := idv.d.(*tensor.Dense) diffOp := newSoftmaxOpDiff() - result, err := diffOp.Do(odv) + result, err := diffOp.Do(odv.Value, odv.d) if err != nil { return err } - sum, err := odvd.(*tensor.Dense).Add(result.(*tensor.Dense), tensor.UseUnsafe()) + sum, err := idvd.Add(result.(*tensor.Dense), tensor.UseUnsafe()) if err != nil { return err } @@ -178,7 +175,7 @@ func (op *softmaxOp) SymDiff(inputs Nodes, output, grad *Node) (Nodes, error) { diffOp := newSoftmaxOpDiff() nodes := make(Nodes, 1) - nodes[0], err = ApplyOp(diffOp, output) + nodes[0], err = ApplyOp(diffOp, output, grad) return nodes, err } @@ -193,15 +190,14 @@ func (op *softmaxOp) DiffWRT(inputs int) []bool { } type softmaxDiffOp struct { + axis int } func newSoftmaxOpDiff() *softmaxDiffOp { return &softmaxDiffOp{} } -func (op *softmaxDiffOp) Arity() int { - return 1 -} +func (op *softmaxDiffOp) Arity() int { return 2 } func (op *softmaxDiffOp) ReturnsPtr() bool { return false } @@ -224,57 +220,133 @@ func (op *softmaxDiffOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { } func (op *softmaxDiffOp) Type() hm.Type { - aType := hm.TypeVariable('a') - - ta := newTensorType(1, aType) + a := hm.TypeVariable('a') - return hm.NewFnType(ta, ta) // f(float64) float64 + return hm.NewFnType(a, a, a) // f(float64) float64 } func (op *softmaxDiffOp) OverwritesInput() int { return -1 } -func (op *softmaxDiffOp) checkInput(inputs ...Value) (tensor.Tensor, error) { +func (op *softmaxDiffOp) checkInput(inputs ...Value) (tensor.Tensor, tensor.Tensor, error) { if err := checkArity(op, len(inputs)); err != nil { - return nil, err + return nil, nil, err } var ( - in tensor.Tensor - - ok bool + in tensor.Tensor + grad tensor.Tensor + ok bool ) switch t := inputs[0].(type) { case *dualValue: if in, ok = t.Value.(tensor.Tensor); !ok { - return nil, errors.Errorf("input should be a tensor, got %T", inputs[0]) + return nil, nil, errors.Errorf("input should be a tensor, got %T", inputs[0]) } case tensor.Tensor: in = t default: - return nil, errors.Errorf("input type is not supported, got %T", inputs[0]) + return nil, nil, errors.Errorf("input type is not supported, got %T", inputs[0]) } - return in, nil + switch t := inputs[1].(type) { + case *dualValue: + if grad, ok = t.Value.(tensor.Tensor); !ok { + return nil, nil, errors.Errorf("input should be a tensor, got %T", inputs[1]) + } + case tensor.Tensor: + grad = t + default: + return nil, nil, errors.Errorf("input type is not supported, got %T", inputs[1]) + } + + return in, grad, nil } func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { - inputTensor, err := op.checkInput(inputs...) + y, grad, err := op.checkInput(inputs...) if err != nil { return nil, fmt.Errorf("Can't check SoftmaxDiff input: %w", err) } - y := inputTensor.(*tensor.Dense) + s := y.Shape() - fst := tensor.ProdInts([]int(s)) - y.Reshape(fst, 1) - yᵀ := y.ShallowClone() - yᵀ.T() - yyᵀ, err := tensor.MatMul(y, yᵀ) - if err != nil { - return nil, err + axis := op.axis + if axis == -1 { + axis = s.Dims() - 1 } - diag := tensor.New(tensor.AsDenseDiag(y.Data())) - return diag.Sub(yyᵀ.(*tensor.Dense)) // jacobian + /* + What follows is an a bit of a splayed out algorithm + Let's imagine Y, and dY are both (a,b,c)-shaped tensors. + We reshape it to a matrix. Let's examine the cases: + case axis = 0: + we reshape it to (1, a*b*c) + case axis = 1: + we reshape it to (a, b*c) + case axis = 2: + we reshape it to (a*b*c, 1) + + We'll call the result matrix M, with shape (N, D) + + Now, we'll do some work: + 1. Make scalars of shape (N,). + 2. Make mulars of shape (D,). To facilitate multiplication, we set the initial valus + to the identity of multiplication: 1. + 3. Populate scalars. This is abit tricky: + scalars[i] = Y[i] · dY[i] + TODO: insert mathematical explanation of what accumulating gradients magic is happening here. + 4. Reshape the scalars to (N, 1) + 5. Reshape the mulars to (1, D) + 6. Perform matrix multiplication... WITH A TWIST. We need to multiply all the results by -1. Then add a bias of 1. + + Step 6 can be done in the usual manner. However, the BLAS librarie contain `(D|S)gemm`, which allows you to set alpha and beta. + */ + + prodBefore := tensor.ProdInts([]int(s[:axis])) // N + prodAfter := tensor.ProdInts([]int(s[axis:])) // D + if prodBefore == 0 { // indicating an error + prodBefore = 1 + } + if prodAfter == 0 { + prodAfter = 1 + } + + scalars := tensor.New(tensor.WithShape(prodBefore), tensor.Of(y.Dtype())) + mulars := tensor.New(tensor.WithShape(prodAfter), tensor.Of(y.Dtype())) + mulars.Memset(one(y.Dtype()).Data()) // set all mulars to 1. + + impl := gonum.Implementation{} + var val interface{} + switch yy := y.Data().(type) { + case []float64: + gradData := grad.Data().([]float64) + mulData := mulars.Data().([]float64) + var scaleData []float64 + switch sd := scalars.Data().(type) { + case float64: + scaleData = make([]float64, 1) + scaleData[0] = sd + case []float64: + scaleData = sd + + } + for i := 0; i < prodBefore; i++ { + scaleData[i] = impl.Ddot(prodAfter, yy[i*prodAfter:], 1, gradData[i*prodAfter:], 1) + } + C := make([]float64, s.TotalSize()) // output + + // important note: here, alpha is -1 and beta is 1. + impl.Dgemm(blas.NoTrans, blas.NoTrans, prodBefore, prodAfter, 1, -1, scaleData, 1, mulData, prodAfter, 1, C, prodAfter) + val = C + case []float32: + //TODO: use Sdot and Sgemm instead of Ddot and Dgemm + case []complex64: + panic("Complex64 not done yet") + case []complex128: + panic("Complex128 not done yet") + } + + retVal := tensor.New(tensor.WithShape(s.Clone()...), tensor.WithBacking(val)) + return tensor.Mul(retVal, y, tensor.UseUnsafe()) } // ensure it complies with the Op interface diff --git a/operations_test.go b/operations_test.go index 04a44449..4786589b 100644 --- a/operations_test.go +++ b/operations_test.go @@ -2,6 +2,7 @@ package gorgonia import ( "io/ioutil" + "log" "runtime" "testing" @@ -399,7 +400,6 @@ func TestMisha(t *testing.T) { func TestSoftMax(t *testing.T) { defer runtime.GC() - assert := assert.New(t) g := NewGraph() xT := tensor.New(tensor.WithBacking([]float64{0.1, 0.2, -0.3, 0.4, 0.5})) x := NewVector(g, Float64, WithShape(5), WithValue(xT)) @@ -417,49 +417,45 @@ func TestSoftMax(t *testing.T) { t.Error(err) } - var smg, xG Value + var xG Value var err error - if smg, err = sm.Grad(); err != nil { - t.Error(err) - } - if xG, err = x.Grad(); err != nil { t.Error(err) } // machine 2, graph 2 - - g2 := NewGraph() + h := NewGraph() xT2 := tensor.New(tensor.WithBacking([]float64{0.1, 0.2, -0.3, 0.4, 0.5})) - x2 := NewVector(g, Float64, WithShape(5), WithValue(xT2)) + x2 := NewVector(h, Float64, WithShape(5), WithValue(xT2)) sm2 := Must(SoftMax(x2)) logsm2 := Must(Neg(Must(Log(sm2)))) Must(Slice(logsm2, S(2))) - m2 := NewLispMachine(g2) + m2 := NewLispMachine(h) defer m2.Close() if err = m2.RunAll(); err != nil { + log.Printf("ERR %v", err) t.Error(err) } - var sm2g, x2G Value - if sm2g, err = sm2.Grad(); err != nil { - t.Error(err) - } - + var x2G Value if x2G, err = x2.Grad(); err != nil { t.Error(err) } - assert.Equal(smg, sm2g) - assert.Equal(xG, x2G) + if !floatsEqual64(xG.Data().([]float64), x2G.Data().([]float64)) { + t.Errorf("Expected both gradients of X to be the same.") + } - correctGrad := []float64{ - -0, -0, -8.379839604304342, -0, -0, + correctXGrad := []float64{ + 0.178025447751409, 0.1967485475322529, 0.11933402633223977, 0.24030921861990098, 0.2655827597641975, } - if !floatsEqual64(correctGrad, smg.Data().([]float64)) { - t.Errorf("Expected results to be %v. Got %v.", correctGrad, smg.Data()) + if !floatsEqual64(correctXGrad, x2G.Data().([]float64)) { + t.Errorf("Expected results to be %v. Got %v.", correctXGrad, x2G.Data()) + } + if !floatsEqual64(correctXGrad, xG.Data().([]float64)) { + t.Errorf("Expected results to be %v. Got %v.", correctXGrad, xG.Data()) } } From 78231d3f294087e4a275fdb98e174b078ad5fa6a Mon Sep 17 00:00:00 2001 From: chewxy Date: Tue, 13 Oct 2020 23:00:56 +1100 Subject: [PATCH 11/12] Fixed softmax for failing tests. Added float32 support for softmaxdiffop Co-authored-by: David Cuadrado <73729+dcu@users.noreply.github.com> --- example_err_test.go | 9 +++++---- op_softmax.go | 22 ++++++++++++++++++++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/example_err_test.go b/example_err_test.go index 098e22f8..bb70e77c 100644 --- a/example_err_test.go +++ b/example_err_test.go @@ -61,8 +61,9 @@ func Example_errorHandling() { _ = nn2PlusWrong // Output: - // nn: Softmax{}()(%9) :: Vector float32 - // An error occurs: Type inference error. Op: + false. Children: [Vector float32, Matrix float64], OpType:Vector a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified - // nn2: Softmax{}()(%9) :: Vector float32 - // An error occurs (caught by recover()): Type inference error. Op: + false. Children: [Vector float32, Matrix float64], OpType:Vector a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified + // nn: Softmax{-1}()(%9) :: Matrix float32 + // An error occurs: Type inference error. Op: + false. Children: [Matrix float32, Matrix float64], OpType:Matrix a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified + // nn2: Softmax{-1}()(%9) :: Matrix float32 + // An error occurs (caught by recover()): Type inference error. Op: + false. Children: [Matrix float32, Matrix float64], OpType:Matrix a → Matrix a → Matrix a: Unable to unify while inferring type of + false: Unification Fail: float64 ~ float32 cannot be unified + } diff --git a/op_softmax.go b/op_softmax.go index 84d7ad04..e93280dc 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -56,7 +56,7 @@ func (op *softmaxOp) String() string { func (op *softmaxOp) InferShape(inputs ...DimSizer) (tensor.Shape, error) { s := inputs[0].(tensor.Shape) - return tensor.Shape{s.TotalSize()}, nil + return s, nil } func (op *softmaxOp) Type() hm.Type { @@ -338,7 +338,25 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { impl.Dgemm(blas.NoTrans, blas.NoTrans, prodBefore, prodAfter, 1, -1, scaleData, 1, mulData, prodAfter, 1, C, prodAfter) val = C case []float32: - //TODO: use Sdot and Sgemm instead of Ddot and Dgemm + gradData := grad.Data().([]float32) + mulData := mulars.Data().([]float32) + var scaleData []float32 + switch sd := scalars.Data().(type) { + case float32: + scaleData = make([]float32, 1) + scaleData[0] = sd + case []float32: + scaleData = sd + + } + for i := 0; i < prodBefore; i++ { + scaleData[i] = impl.Sdot(prodAfter, yy[i*prodAfter:], 1, gradData[i*prodAfter:], 1) + } + C := make([]float32, s.TotalSize()) // output + + // important note: here, alpha is -1 and beta is 1. + impl.Sgemm(blas.NoTrans, blas.NoTrans, prodBefore, prodAfter, 1, -1, scaleData, 1, mulData, prodAfter, 1, C, prodAfter) + val = C case []complex64: panic("Complex64 not done yet") case []complex128: From 75b0715cc583ed77c75a880e5382e120d34a7b24 Mon Sep 17 00:00:00 2001 From: chewxy Date: Tue, 13 Oct 2020 23:04:12 +1100 Subject: [PATCH 12/12] Fixed a bunch of things that @dcu picked up --- op_softmax.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/op_softmax.go b/op_softmax.go index e93280dc..80911772 100644 --- a/op_softmax.go +++ b/op_softmax.go @@ -148,7 +148,7 @@ func (op *softmaxOp) DoDiff(ctx ExecutionContext, inputs Nodes, output *Node) er odv := output.boundTo.(*dualValue) idv := inputs[0].boundTo.(*dualValue) idvd := idv.d.(*tensor.Dense) - diffOp := newSoftmaxOpDiff() + diffOp := newSoftmaxOpDiff(op.axis) result, err := diffOp.Do(odv.Value, odv.d) if err != nil { @@ -172,7 +172,7 @@ func (op *softmaxOp) SymDiff(inputs Nodes, output, grad *Node) (Nodes, error) { return nil, err } - diffOp := newSoftmaxOpDiff() + diffOp := newSoftmaxOpDiff(op.axis) nodes := make(Nodes, 1) nodes[0], err = ApplyOp(diffOp, output, grad) @@ -193,8 +193,8 @@ type softmaxDiffOp struct { axis int } -func newSoftmaxOpDiff() *softmaxDiffOp { - return &softmaxDiffOp{} +func newSoftmaxOpDiff(axis int) *softmaxDiffOp { + return &softmaxDiffOp{axis: axis} } func (op *softmaxDiffOp) Arity() int { return 2 } @@ -289,7 +289,7 @@ func (op *softmaxDiffOp) Do(inputs ...Value) (Value, error) { Now, we'll do some work: 1. Make scalars of shape (N,). - 2. Make mulars of shape (D,). To facilitate multiplication, we set the initial valus + 2. Make mulars of shape (D,). To facilitate multiplication, we set the initial values to the identity of multiplication: 1. 3. Populate scalars. This is abit tricky: scalars[i] = Y[i] · dY[i]