-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.cpp
301 lines (252 loc) · 11.1 KB
/
linear_regression.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#include <armadillo>
#include <lesstimate.h>
double sumSquaredError(
arma::colvec b, // the parameter vector
arma::colvec y, // the dependent variable
arma::mat X // the design matrix
)
{
// compute the sum of squared errors:
arma::mat sse = arma::trans(y - X * b) * (y - X * b);
// other packages, such as glmnet, scale the sse with
// 1/(2*N), where N is the sample size. We will do that here as well
sse *= 1.0 / (2.0 * y.n_elem);
// note: We must return a double, but the sse is a matrix
// To get a double, just return the single value that is in
// this matrix:
return (sse(0, 0));
}
arma::rowvec sumSquaredErrorGradients(
arma::colvec b, // the parameter vector
arma::colvec y, // the dependent variable
arma::mat X // the design matrix
)
{
// note: we want to return our gradients as row-vector; therefore,
// we have to transpose the resulting column-vector:
arma::rowvec gradients = arma::trans(-2.0 * X.t() * y + 2.0 * X.t() * X * b);
// other packages, such as glmnet, scale the sse with
// 1/(2*N), where N is the sample size. We will do that here as well
gradients *= (.5 / y.n_rows);
return (gradients);
}
// IMPORTANT: The library is calles lesstimate, but
// because it was initially a sub-folder of lessSEM, the
// namespace is still called lessSEM.
class linearRegressionModel : public less::model
{
public:
// the less::model class has two methods: "fit" and "gradients".
// Both of these methods must follow a fairly strict framework.
// First: They must receive exactly two arguments:
// 1) an arma::rowvec with current parameter values
// 2) an Rcpp::StringVector with current parameter labels
// (NOTE: the lessSEM package currently does not make use of these
// labels. This is just for future use. If you don't want to use
// the labels, just pass any less::stringVector you want).
// if you are using R, a less::stringVector is just an
// Rcpp::StringVector. Otherwise it is a custom vector. that can
// be created with less::stringVector myVector(numberofParameters).
// Second:
// 1) fit must return a double (e.g., the -2-log-likelihood)
// 2) gradients must return an arma::rowvec with the gradients. It is
// important that the gradients are returned in the same order as the
// parameters (i.e., don't shuffle your gradients, lessSEM will
// assume that the first value in gradients corresponds to the
// derivative with respect to the first parameter passed to
// the function).
double fit(arma::rowvec b, less::stringVector labels) override
{
// NOTE: In sumSquaredError we assumed that b was a column-vector. We
// have to transpose b to make things work
return (sumSquaredError(b.t(), y, X));
}
arma::rowvec gradients(arma::rowvec b, less::stringVector labels) override
{
// NOTE: In sumSquaredErrorGradients we assumed that b was a column-vector. We
// have to transpose b to make things work
return (sumSquaredErrorGradients(b.t(), y, X));
}
// IMPORTANT: Note that we used some arguments above which we did not pass to
// the functions: y, and X. Without these arguments, we cannot use our
// sumSquaredError and sumSquaredErrorGradients function! To make these
// accessible to our functions, we have to define them:
const arma::colvec y;
const arma::mat X;
// finally, we create a constructor for our class
linearRegressionModel(arma::colvec y_, arma::mat X_) : y(y_), X(X_){};
};
int main()
{
arma::mat X = {{1.00, -0.70, -0.86},
{1.00, -1.20, -2.10},
{1.00, -0.15, 1.13},
{1.00, -0.50, -1.50},
{1.00, 0.83, 0.44},
{1.00, -1.52, -0.72},
{1.00, 1.40, -1.30},
{1.00, -0.60, -0.59},
{1.00, -1.10, 2.00},
{1.00, -0.96, -0.20}};
arma::colvec y = {{0.56},
{-0.32},
{0.01},
{-0.09},
{0.18},
{-0.11},
{0.62},
{0.72},
{0.52},
{0.12}};
linearRegressionModel linReg(y, X);
arma::rowvec startingValues(3);
startingValues.fill(0.0);
std::vector<std::string> labels{"b0", "b1", "b2"};
less::stringVector parameterLabels(labels);
// penalty: We don't penalize the intercept b0, but we penalize
// b1 and b2 with lasso:
std::vector<std::string> penalty{"none", "lasso", "lasso"};
// tuning parameter lambda:
arma::rowvec lambda = {{0.0, 0.2, 0.2}};
// theta is not used by the lasso penalty:
arma::rowvec theta = {{0.0, 0.0, 0.0}};
less::fitResults fitResultGlmnet = less::fitGlmnet(
linReg,
startingValues,
parameterLabels,
penalty,
lambda,
theta //,
// initialHessian, // optional, but can be very useful
);
std::cout << "\n\n### glmnet ###\n";
std::cout << "fit: " << fitResultGlmnet.fit << "\n";
std::cout << "parameters: " << fitResultGlmnet.parameterValues << "\n";
less::fitResults fitResultIsta = less::fitIsta(
linReg,
startingValues,
parameterLabels,
penalty,
lambda,
theta);
std::cout << "\n### ista ###\n";
std::cout << "fit: " << fitResultIsta.fit << "\n";
std::cout << "parameters: " << fitResultIsta.parameterValues << "\n";
// adapt optimizer
// First, create a new instance of class controlGLMNET:
less::controlGLMNET controlOptimizerGlmnet = less::controlGlmnetDefault();
// Next, adapt the settings:
controlOptimizerGlmnet.maxIterOut = 1000;
// pass the argument to the fitGlmnet function:
fitResultGlmnet = less::fitGlmnet(
linReg,
startingValues,
parameterLabels,
penalty,
lambda,
theta,
// sets Hessian to identity; a better Hessian will help!
arma::mat(1, 1, arma::fill::ones),
controlOptimizerGlmnet//,
// verbose // set to >0 to get additional information on the optimization
);
std::cout << "\n\n### glmnet - adapted optimizer ###\n";
std::cout << "fit: " << fitResultGlmnet.fit << "\n";
std::cout << "parameters: " << fitResultGlmnet.parameterValues << "\n";
// First, create a new instance of class controlIsta:
less::controlIsta controlOptimizerIsta = less::controlIstaDefault();
// Next, adapt the settings:
controlOptimizerIsta.maxIterOut = 1000;
// pass the argument to the fitIsta function:
fitResultIsta = less::fitIsta(
linReg,
startingValues,
parameterLabels,
penalty,
lambda,
theta,
controlOptimizerIsta//,
// verbose // set to >0 to get additional information on the optimization
);
std::cout << "\n### ista - adapted optimizer ###\n";
std::cout << "fit: " << fitResultIsta.fit << "\n";
std::cout << "parameters: " << fitResultIsta.parameterValues << "\n";
// specialized interfaces
// Specify the penalties we want to use:
less::penaltyLASSOGlmnet lasso;
less::penaltyRidgeGlmnet ridge;
// Note that we used the glmnet variants of lasso and ridge. The reason
// for this is that the glmnet implementation allows for parameter-specific
// lambda and alpha values while the current ista implementation does not.
// These penalties take tuning parameters of class tuningParametersEnetGlmnet
less::tuningParametersEnetGlmnet tp;
// We have to specify alpha and lambda. Here, different values can
// be specified for each parameter:
tp.lambda = arma::rowvec(startingValues.n_elem);
tp.lambda.fill(0.2);
tp.alpha = arma::rowvec(startingValues.n_elem);
tp.alpha.fill(0.3);
// Finally, there is also the weights. The weights vector indicates, which
// of the parameters is regularized (weight = 1) and which is unregularized
// (weight =0). It also allows for adaptive lasso weights (e.g., weight =.0123).
// weights must be an arma::rowvec of the same length as our parameter vector.
arma::rowvec weights(startingValues.n_elem);
weights.fill(1.0); // we want to regularize all parameters
weights.at(0) = 0.0; // except for the first one, which is our intercept.
tp.weights = weights;
// to optimize this model, we have to pass it to
// the glmnet function:
less::fitResults lmFitGlmnet = less::glmnet(
linReg, // the first argument is our model
startingValues, // arma::rowvec with starting values
parameterLabels, // less::stringVector with labels
lasso, // non-smooth penalty
ridge, // smooth penalty
tp//, // tuning parameters
//controlOptimizer // optional fine-tuning (see above)
);
std::cout << "\n\n### glmnet - elastic net ###\n";
std::cout << "fit: " << lmFitGlmnet.fit << "\n";
std::cout << "parameters: " << lmFitGlmnet.parameterValues << "\n";
// The elastic net is a combination of a ridge penalty and
// a lasso penalty.
// NOTE: HERE COMES THE BIGGEST DIFFERENCE BETWEEN GLMNET AND ISTA:
// 1) ISTA ALSO REQUIRES THE DEFINITION OF A PROXIMAL OPERATOR. THESE
// ARE CALLED proximalOperatorZZZ IN lessSEM (e.g., proximalOperatorLasso
// for lasso).
// 2) THE SMOOTH PENALTY (RIDGE) AND THE LASSO PENALTY MUST HAVE SEPARATE
// TUNING PARMAMETERS.
less::proximalOperatorLasso proxOp; // HERE, WE DEFINE THE PROXIMAL OPERATOR
less::penaltyLASSO lassoIsta;
less::penaltyRidge ridgeIsta;
// BOTH, LASSO AND RIDGE take tuning parameters of class tuningParametersEnet
less::tuningParametersEnet tpLasso;
less::tuningParametersEnet tpRidge;
// We have to specify alpha and lambda. Here, the same value is used
// for each parameter:
tpLasso.alpha = .3;
tpLasso.lambda = .2;
tpRidge.alpha = .3;
tpRidge.lambda = .2;
// A weights vector indicates, which
// of the parameters is regularized (weight = 1) and which is unregularized
// (weight =0). It also allows for adaptive lasso weights (e.g., weight =.0123).
// weights must be an arma::rowvec of the same length as our parameter vector.
tpLasso.weights = weights;
tpRidge.weights = weights;
// to optimize this model, we have to pass it to the ista function:
less::fitResults lmFitIsta = less::ista(
linReg, // the first argument is our model
startingValues, // arma::rowvec with starting values
parameterLabels, // less::stringVector with labels
proxOp, // proximal opertator
lassoIsta, // our lasso penalty
ridgeIsta, // our ridge penalty
tpLasso, // our tuning parameter FOR THE LASSO PENALTY
tpRidge//, // our tuning parameter FOR THE RIDGE PENALTY
//controlOptimizer // optional fine-tuning (see above)
);
std::cout << "\n\n### ista - elastic net ###\n";
std::cout << "fit: " << lmFitIsta.fit << "\n";
std::cout << "parameters: " << lmFitIsta.parameterValues << "\n";
}