-
Notifications
You must be signed in to change notification settings - Fork 1
/
eigen_test.cpp
130 lines (103 loc) · 2.61 KB
/
eigen_test.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#include <iostream>
#include <cmath>
#include <Eigen/Dense>
#include <ctime>
#include "AVX/avx_c.h"
void proxL1(Eigen::VectorXf& x, const float lambda)
{
for(int i = 0; i < x.rows(); i++)
{
float absx = fabs(x[i]);
if(absx >= lambda)
{
x[i] = (x[i] >= 0 ? 1 : -1)*fmax(absx - lambda, 0);
} else {
x[i] = 0.0f;
}
}
}
void proxL2square(Eigen::VectorXf& x, const float lambda)
{
for(int i = 0; i < x.rows(); i++)
{
x[i] *= 1.0f/(1.0f + lambda);
}
}
/* Soft thresholding AVX */
void proxL1_eigen_avx(Eigen::VectorXf& t, const float lambda)
{
const __m256 zero = _mm256_setzero_ps();
const __m256 l = _mm256_set1_ps(lambda);
__m256 x, sign;
float* raw_t = t.data();
for(int i=0; i< t.rows(); i+=8)
{
x = _mm256_load_ps(raw_t+i);
sign = avx_sign_ps(x);
x = avx_abs_ps(x);
x = _mm256_sub_ps(x, l); // abs(x) - lambda
x = _mm256_max_ps(zero, x);
x = _mm256_mul_ps(x, sign);
_mm256_storeu_ps(raw_t+i, x);
}
}
void proxL2square_eigen_avx(Eigen::VectorXf& t, const float lambda)
{
const __m256 multiplier = _mm256_set1_ps(1.0f/(1.0f+lambda));
float* raw_t = t.data();
for(int i=0; i<t.rows(); i+=8)
{
_mm256_store_ps(raw_t+i, _mm256_mul_ps(_mm256_load_ps(raw_t+i), multiplier));
}
}
Eigen::VectorXf solve_lasso(const Eigen::MatrixXf& A, const Eigen::VectorXf& b, const float lambda)
{
int steps = 0;
const float step_size = 0.01f; // for testing purpose
Eigen::VectorXf x = Eigen::VectorXf::Zero(b.rows());
while(steps < 300)
{
Eigen::VectorXf grad = A.transpose()*(A*x - b);
proxL1(grad, lambda);
x = x - step_size * grad;
steps++;
}
return x;
}
Eigen::VectorXf solve_lasso_avx(const Eigen::MatrixXf& A, const Eigen::VectorXf& b, const float lambda)
{
int steps = 0;
const float step_size = 0.01f; // for testing purpose
Eigen::VectorXf x = Eigen::VectorXf::Zero(b.rows());
while(steps < 300)
{
Eigen::VectorXf grad = A.transpose()*(A*x - b);
proxL1_eigen_avx(grad, lambda);
x = x - step_size * grad;
steps++;
}
return x;
}
double elapsed(clock_t begin, clock_t end)
{
return (double)(end - begin) / CLOCKS_PER_SEC;
}
int main()
{
std::srand(88);
const int n = 800000;
Eigen::VectorXf b = Eigen::VectorXf::Random(n);
Eigen::VectorXf x(b), x_avx(b);
// Eigen::MatrixXf A = 1e-1f*Eigen::MatrixXf::Random(n,n);
clock_t begin, end ;
begin = clock();
proxL1(x, 0.1f);
end = clock();
std::cout << "NO AVX: " << elapsed(begin, end) << std::endl;
begin = clock();
proxL1_eigen_avx(x_avx, 0.1f);
end = clock();
std::cout << "AVX: " << elapsed(begin, end) << std::endl;
std::cout << "L2 Norm of diff " << (x-x_avx).norm() << std::endl;
return 0;
}