Skip to content

Commit

Permalink
Documentation comments, closes #2
Browse files Browse the repository at this point in the history
  • Loading branch information
cgravill committed Jun 13, 2019
1 parent b424684 commit 5420176
Showing 1 changed file with 64 additions and 32 deletions.
96 changes: 64 additions & 32 deletions src/Hype/Optimize.fs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,22 @@ open DiffSharp.Util

/// Learning rate schemes
type LearningRate =
| Constant of D // Constant
| Decay of D * D // 1 / t decay, a = a0 / (1 + kt). Initial value, decay rate
| ExpDecay of D * D // Exponential decay, a = a0 * Exp(-kt). Initial value, decay rate
| Schedule of DV // Scheduled learning rate vector, its length overrides Params.Epochs
| Backtrack of D * D * D // Backtracking line search. Initial value, c, rho
| StrongWolfe of D * D * D // Strong Wolfe line search. lmax, c1, c2
| AdaGrad of D // Adagrad. Initial value
| RMSProp of D * D // RMSProp. Initial value, decay rate
/// Constant
| Constant of D
/// 1 / t decay, a = a0 / (1 + kt). Initial value, decay rate
| Decay of D * D
/// Exponential decay, a = a0 * Exp(-kt). Initial value, decay rate
| ExpDecay of D * D
/// Scheduled learning rate vector, its length overrides Params.Epochs
| Schedule of DV
/// Backtracking line search. Initial value, c, rho
| Backtrack of D * D * D
/// Strong Wolfe line search. lmax, c1, c2
| StrongWolfe of D * D * D
/// Adagrad. Initial value
| AdaGrad of D
/// RMSProp. Initial value, decay rate
| RMSProp of D * D
static member DefaultConstant = Constant (D 0.001f)
static member DefaultDecay = Decay (D 1.f, D 0.1f)
static member DefaultExpDecay = ExpDecay (D 1.f, D 0.1f)
Expand Down Expand Up @@ -152,8 +160,10 @@ type LearningRate =
/// Training batch configuration
type Batch =
| Full
| Minibatch of int // Minibatch of given size
| Stochastic // Minibatch with size 1, SGD
/// Minibatch of given size
| Minibatch of int
/// Minibatch with size 1, SGD
| Stochastic
override b.ToString() =
match b with
| Full -> "Full"
Expand All @@ -167,13 +177,20 @@ type Batch =

/// Gradient-based optimization methods
type Method =
| GD // Gradient descent
| CG // Conjugate gradient
| CD // Conjugate descent
| NonlinearCG // Nonlinear conjugate gradient
| DaiYuanCG // Dai & Yuan conjugate gradient
| NewtonCG // Newton conjugate gradient
| Newton // Exact Newton
/// Gradient descent
| GD
/// Conjugate gradient
| CG
/// Conjugate descent
| CD
/// Nonlinear conjugate gradient
| NonlinearCG
/// Dai & Yuan conjugate gradient
| DaiYuanCG
/// Newton conjugate gradient
| NewtonCG
/// Exact Newton
| Newton
override o.ToString() =
match o with
| GD -> "Gradient descent"
Expand All @@ -191,30 +208,34 @@ type Method =
let g' = gradclip g'
let p' = -g'
v', g', p'
| CG -> // Hestenes and Stiefel 1952
/// Hestenes and Stiefel 1952
| CG ->
fun w f g p gradclip ->
let v', g' = grad' f w
let g' = gradclip g'
let y = g' - g
let b = (g' * y) / (p * y)
let p' = -g' + b * p
v', g', p'
| CD -> // Fletcher 1987
/// Fletcher 1987
| CD ->
fun w f g p gradclip ->
let v', g' = grad' f w
let g' = gradclip g'
let b = (DV.normSq g') / (-p * g)
let p' = -g' + b * p
v', g', p'
| DaiYuanCG -> // Dai and Yuan 1999
/// Dai and Yuan 1999
| DaiYuanCG ->
fun w f g p gradclip ->
let v', g' = grad' f w
let g' = gradclip g'
let y = g' - g
let b = (DV.normSq g') / (p * y)
let p' = -g' + b * p
v', g', p'
| NonlinearCG -> // Fletcher and Reeves 1964
/// Fletcher and Reeves 1964
| NonlinearCG ->
fun w f g p gradclip ->
let v', g' = grad' f w
let g' = gradclip g'
Expand All @@ -238,8 +259,10 @@ type Method =

/// Momentum configuration
type Momentum =
| Momentum of D // Default momentum
| Nesterov of D // Nesterov momentum
/// Default momentum
| Momentum of D
/// Nesterov momentum
| Nesterov of D
| NoMomentum
static member DefaultMomentum = Momentum (D 0.9f)
static member DefaultNesterov = Nesterov (D 0.9f)
Expand All @@ -256,11 +279,16 @@ type Momentum =

/// Loss function configuration
type Loss =
| L1Loss // L1 norm, least absolute deviations
| L2Loss // L2 norm
| Quadratic // L2 norm squared, least squares
| CrossEntropyOnLinear // Cross entropy after linear layer
| CrossEntropyOnSoftmax // Cross entropy after softmax layer
/// L1 norm, least absolute deviations
| L1Loss
/// L2 norm
| L2Loss
/// L2 norm squared, least squares
| Quadratic
/// Cross entropy after linear layer
| CrossEntropyOnLinear
/// Cross entropy after softmax layer
| CrossEntropyOnSoftmax
override l.ToString() =
match l with
| L1Loss -> "L1 norm, least absolute deviations"
Expand All @@ -278,8 +306,10 @@ type Loss =

/// Regularization configuration
type Regularization =
| L1Reg of D // L1 regularization
| L2Reg of D // L2 regularization
/// L1 regularization
| L1Reg of D
/// L2 regularization
| L2Reg of D
| NoReg
static member DefaultL1Reg = L1Reg (D 0.0001f)
static member DefaultL2Reg = L2Reg (D 0.0001f)
Expand All @@ -296,7 +326,8 @@ type Regularization =

/// Gradient clipping configuration
type GradientClipping =
| NormClip of D // Norm clipping
/// Norm clipping
| NormClip of D
| NoClip
static member DefaultNormClip = NormClip (D 1.f)
override g.ToString() =
Expand All @@ -310,7 +341,8 @@ type GradientClipping =

/// Early stopping configuration
type EarlyStopping =
| Early of int * int // Stagnation patience, overfitting patience
/// Stagnation patience, overfitting patience
| Early of int * int
| NoEarly
static member DefaultEarly = Early (750, 10)
override e.ToString() =
Expand Down

0 comments on commit 5420176

Please sign in to comment.