-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.h
63 lines (51 loc) · 2.63 KB
/
model.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include <gtest/gtest.h>
#include <vector>
#include "maybe.h"
class Model {
// Base class that represent an ML model
public:
// Returns the number of parameters this model has
virtual int nParameters() = 0;
// Train the model, i.e. set the parameters to the values that minimize
// the Loss function. This is performed using stochastic gradient descent.
// Set log to true to log the learning.
// Function returns the last loss function value.
Maybe<double> Train(double learningRate, int maxSteps, bool log);
// minImprovementToEarlyStop is a number between 0 and 1 that signals
// the minimum relative error decrease required for training to progress.
// For example for a value of 0.1, training will stop if the loss doesn't
// decrease by 10% on consecutive steps.
Maybe<double> Train(double learningRate, double minImprovementToEarlyStop,
int maxSteps, bool log);
// Returns the total loss, i.e. the sum of the squared residues
double Loss();
private:
// After calling this method, the model's parameters will be set in the
// target vector
virtual Maybe<Void> GetParameters(std::vector<double>* target) = 0;
// After calling this method, the model's parameters will be set with the
// values from the input vector
virtual Maybe<Void> SetParameters(std::vector<double>* parameters) = 0;
// Returns the value of the i-th residue given the model's current
// parameters.
// Ex: loss = ((model(x0)-y0)^2 + (model(x1)-y1)^2))
// Residue(0) -> model(x0)-y0
// Residue(1) -> model(x1)-y1
virtual double Residue(int i) = 0;
// Returns the number of residues in the loss
// Ex: loss = (model(x0)-y0)^2 + (model(x1) - y1)^2 -> 2 residues
virtual int nResidues() = 0;
// Returns the gradient of the i-th loss summand with respect to the model's
// parameters i.e. if the model has 2 parameters, the returned vector will
// be the derivative of the i-th squared residue with respect to the first
// parameter followed by the derivative of the i-th
// squared residue with respect to the second parameter. It's fine if
// multiplicative constants are removed. I.e. returning [1,5] if the formal
// mathematical gradient is [2,10] is absolutely fine, because we scale
// the gradient anyways during training.
virtual std::vector<double> LossGradient(int i) = 0;
// Performs a step in gradient descent considering the i-th residue. Note:
// does not guarantee that the overall loss will decrease.
void StochasticGradientDescentStep(int i, double stepSize);
FRIEND_TEST(ModelTest, LossTest);
};