Skip to content

Commit

Permalink
feat: add Adam optimizer to factorization machines (#731)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghaoz authored Jul 17, 2023
1 parent 9c1a222 commit 2c47ad1
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 20 deletions.
14 changes: 14 additions & 0 deletions base/floats/floats.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

package floats

import "math"

func dot(a, b []float32) (ret float32) {
for i := range a {
ret += a[i] * b[i]
Expand Down Expand Up @@ -149,6 +151,18 @@ func AddTo(a, b, dst []float32) {
}
}

func AddConst(dst []float32, c float32) {
for i := range dst {
dst[i] += c
}
}

func Sqrt(a []float32) {
for i := range a {
a[i] = float32(math.Sqrt(float64(a[i])))
}
}

// Dot two vectors.
func Dot(a, b []float32) (ret float32) {
if len(a) != len(b) {
Expand Down
15 changes: 14 additions & 1 deletion base/floats/floats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
package floats

import (
"github.com/stretchr/testify/assert"
"testing"

"github.com/stretchr/testify/assert"
)

func TestMatZero(t *testing.T) {
Expand Down Expand Up @@ -122,6 +123,18 @@ func TestAddTo(t *testing.T) {
assert.Panics(t, func() { AddTo(nil, nil, dst) })
}

func TestAddConst(t *testing.T) {
a := []float32{1, 2, 3, 4}
AddConst(a, 2)
assert.Equal(t, []float32{3, 4, 5, 6}, a)
}

func TestSqrt(t *testing.T) {
a := []float32{1, 4, 9, 16}
Sqrt(a)
assert.Equal(t, []float32{1, 2, 3, 4}, a)
}

func TestDot(t *testing.T) {
a := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
b := []float32{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20}
Expand Down
9 changes: 9 additions & 0 deletions base/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ func NewMatrix32(row, col int) [][]float32 {
return ret
}

// NewTensor32 creates a 3D tensor of 32-bit floats.
func NewTensor32(a, b, c int) [][][]float32 {
ret := make([][][]float32, a)
for i := range ret {
ret[i] = NewMatrix32(b, c)
}
return ret
}

// NewMatrixInt creates a 2D matrix of integers.
func NewMatrixInt(row, col int) [][]int {
ret := make([][]int, row)
Expand Down
10 changes: 9 additions & 1 deletion base/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
package base

import (
"github.com/stretchr/testify/assert"
"testing"

"github.com/stretchr/testify/assert"
)

func TestNewMatrix32(t *testing.T) {
Expand Down Expand Up @@ -47,3 +48,10 @@ func TestNewMatrixInt(t *testing.T) {
assert.Equal(t, 3, len(v))
}
}

func TestNewTensor32(t *testing.T) {
a := NewTensor32(3, 4, 5)
assert.Equal(t, 3, len(a))
assert.Equal(t, 4, len(a[0]))
assert.Equal(t, 5, len(a[0][0]))
}
77 changes: 74 additions & 3 deletions model/click/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ type FM struct {
reg float32
initMean float32
initStdDev float32
optimizer string
}

func (fm *FM) GetParamsGrid(withSize bool) model.ParamsGrid {
Expand Down Expand Up @@ -199,6 +200,7 @@ func (fm *FM) SetParams(params model.Params) {
fm.reg = fm.Params.GetFloat32(model.Reg, 0.0)
fm.initMean = fm.Params.GetFloat32(model.InitMean, 0)
fm.initStdDev = fm.Params.GetFloat32(model.InitStdDev, 0.01)
fm.optimizer = fm.Params.GetString(model.Optimizer, model.Adam)
}

func (fm *FM) Predict(userId, itemId string, userFeatures, itemFeatures []Feature) float32 {
Expand Down Expand Up @@ -285,6 +287,15 @@ func (fm *FM) Fit(trainSet, testSet *Dataset, config *FitConfig) Score {
maxJobs := config.MaxJobs()
temp := base.NewMatrix32(maxJobs, fm.nFactors)
vGrad := base.NewMatrix32(maxJobs, fm.nFactors)
vGrad2 := base.NewMatrix32(maxJobs, fm.nFactors)
mV := base.NewTensor32(maxJobs, int(trainSet.Index.Len()), fm.nFactors)
mW := base.NewMatrix32(maxJobs, int(trainSet.Index.Len()))
mB := make([]float32, maxJobs)
vV := base.NewTensor32(maxJobs, int(trainSet.Index.Len()), fm.nFactors)
vW := base.NewMatrix32(maxJobs, int(trainSet.Index.Len()))
vB := make([]float32, maxJobs)
mVHat := base.NewMatrix32(maxJobs, fm.nFactors)
vVHat := base.NewMatrix32(maxJobs, fm.nFactors)

snapshots := SnapshotManger{}
evalStart := time.Now()
Expand Down Expand Up @@ -330,17 +341,77 @@ func (fm *FM) Fit(trainSet, testSet *Dataset, config *FitConfig) Score {
for it, j := range features {
floats.MulConstAddTo(fm.V[j], values[it], temp[workerId])
}

correct1 := 1 / (1 - math32.Pow(beta1, float32(epoch)))
correct2 := 1 / (1 - math32.Pow(beta2, float32(epoch)))

// Update w_0
fm.B -= fm.lr * grad
switch fm.optimizer {
case model.SGD:
fm.B -= fm.lr * grad
case model.Adam:
// m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t
mB[workerId] = beta1*mB[workerId] + (1-beta1)*grad
// v_t = \beta_2 v_{t-1} + (1 - \beta_2) g^2_t
vB[workerId] = beta2*vB[workerId] + (1-beta2)*grad*grad
// \hat{m}_t = m_t / (1 - \beta^t_1)
mBHat := mB[workerId] * correct1
// \hat{v}_t = v_t / (1 - \beta^t_2)
vBHat := vB[workerId] * correct2
// w_0 = w_0 - \eta \hat{m}_t / (\sqrt{\hat{v}_t} + \epsilon)
fm.B -= fm.lr * mBHat / (math32.Sqrt(vBHat) + eps)
default:
log.Logger().Fatal("unknown optimizer", zap.String("optimizer", fm.optimizer))
}

for it, i := range features {
// Update w_i
fm.W[i] -= fm.lr * grad * values[it]
switch fm.optimizer {
case model.SGD:
fm.W[i] -= fm.lr * grad
case model.Adam:
// m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t
mW[workerId][i] = beta1*mW[workerId][i] + (1-beta1)*grad
// v_t = \beta_2 v_{t-1} + (1 - \beta_2) g^2_t
vW[workerId][i] = beta2*vW[workerId][i] + (1-beta2)*grad*grad
// \hat{m}_t = m_t / (1 - \beta^t_1)
mWHat := mW[workerId][i] * correct1
// \hat{v}_t = v_t / (1 - \beta^t_2)
vWHat := vW[workerId][i] * correct2
// w_i = w_i - \eta \hat{m}_t / (\sqrt{\hat{v}_t} + \epsilon)
fm.W[i] -= fm.lr * mWHat / (math32.Sqrt(vWHat) + eps)
default:
log.Logger().Fatal("unknown optimizer", zap.String("optimizer", fm.optimizer))
}

// Update v_{i,f}
floats.MulConstTo(temp[workerId], values[it], vGrad[workerId])
floats.MulConstAddTo(fm.V[i], -values[it]*values[it], vGrad[workerId])
floats.MulConst(vGrad[workerId], grad)
floats.MulConstAddTo(fm.V[i], fm.reg, vGrad[workerId])
floats.MulConstAddTo(vGrad[workerId], -fm.lr, fm.V[i])
switch fm.optimizer {
case model.SGD:
floats.MulConstAddTo(vGrad[workerId], -fm.lr, fm.V[i])
case model.Adam:
// m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t
floats.MulConst(mV[workerId][i], beta1)
floats.MulConstAddTo(vGrad[workerId], 1-beta1, mV[workerId][i])
// v_t = \beta_2 v_{t-1} + (1 - \beta_2) g^2_t
floats.MulConst(vV[workerId][i], beta2)
floats.MulTo(vGrad[workerId], vGrad[workerId], vGrad2[workerId])
floats.MulConstAddTo(vGrad2[workerId], 1-beta2, vV[workerId][i])
// \hat{m}_t = m_t / (1 - \beta^t_1)
floats.MulConstTo(mV[workerId][i], correct1, mVHat[workerId])
// \hat{v}_t = v_t / (1 - \beta^t_2)
floats.MulConstTo(vV[workerId][i], correct2, vVHat[workerId])
// v_{i,f} = v_{i,f} - \eta \hat{m}_t / (\sqrt{\hat{v}_t} + \epsilon)
floats.Sqrt(vVHat[workerId])
floats.AddConst(vVHat[workerId], eps)
floats.Div(mVHat[workerId], vVHat[workerId])
floats.MulConstAddTo(mVHat[workerId], -fm.lr, fm.V[i])
default:
log.Logger().Fatal("unknown optimizer", zap.String("optimizer", fm.optimizer))
}
}
}
return nil
Expand Down
36 changes: 21 additions & 15 deletions model/click/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ package click

import (
"bytes"
"testing"

"github.com/stretchr/testify/assert"
"github.com/zhenghaoz/gorse/base/task"
"github.com/zhenghaoz/gorse/model"
"testing"
)

const (
regressionDelta = 0.001
regressionDelta = 0.01
classificationDelta = 0.01
)

Expand All @@ -40,19 +41,24 @@ func TestFM_Classification_Frappe(t *testing.T) {
// libfm.exe -train train.libfm -test test.libfm -task c \
// -method sgd -init_stdev 0.01 -dim 1,1,8 -iter 20 \
// -learn_rate 0.01 -regular 0,0,0.0001
train, test, err := LoadDataFromBuiltIn("frappe")
assert.NoError(t, err)
m := NewFM(FMClassification, model.Params{
model.InitStdDev: 0.01,
model.NFactors: 8,
model.NEpochs: 20,
model.Lr: 0.01,
model.Reg: 0.0001,
})
fitConfig := newFitConfigWithTestTracker(20)
score := m.Fit(train, test, fitConfig)
assert.InDelta(t, 0.91684, score.Accuracy, classificationDelta)
assert.Equal(t, m.Complexity(), fitConfig.Task.Done)
for _, optimizer := range []string{model.Adam, model.SGD} {
t.Run(optimizer, func(t *testing.T) {
train, test, err := LoadDataFromBuiltIn("frappe")
assert.NoError(t, err)
m := NewFM(FMClassification, model.Params{
model.InitStdDev: 0.01,
model.NFactors: 8,
model.NEpochs: 20,
model.Lr: 0.01,
model.Reg: 0.0001,
model.Optimizer: optimizer,
})
fitConfig := newFitConfigWithTestTracker(20)
score := m.Fit(train, test, fitConfig)
assert.InDelta(t, 0.91684, score.Accuracy, classificationDelta)
assert.Equal(t, m.Complexity(), fitConfig.Task.Done)
})
}
}

//func TestFM_Classification_MovieLens(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions model/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ const (
Similarity ParamName = "Similarity"
UseFeature ParamName = "UseFeature"
BatchSize ParamName = "BatchSize"
Optimizer ParamName = "Optimizer"

SGD = "sgd"
Adam = "adam"
)

// Params stores hyper-parameters for an model. It is a map between strings
Expand Down

0 comments on commit 2c47ad1

Please sign in to comment.