-
Notifications
You must be signed in to change notification settings - Fork 1
/
test.c
128 lines (97 loc) · 2.56 KB
/
test.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>
#include <c_funcs.h>
#include <SSE/sse_c.h>
#include <AVX/avx_c.h>
#include <sse.h>
#define ALIGN_SSE 16
#define ALIGN_AVX 32
typedef double (*func_vec)(double*,const int);
typedef void (*func_prox)(double*,int,double);
double elapsed(clock_t begin, clock_t end)
{
return (double)(end - begin) / CLOCKS_PER_SEC;
}
double *rand_array(int N)
{
double *t = malloc(N*sizeof(double));
int i;
for(i=0;i<N;i++)
{
t[i] = rand() / (double)RAND_MAX;
t[i] -= 0.5;
}
return t;
}
void disp(double* t, int N)
{
int i;
for(i=0;i<N;i++)
{
printf("%f ", t[i]);
}
printf("\n");
}
void benchmark_vec(func_vec f_c, func_vec f_asm, char name[64], int N)
{
clock_t begin, end;
double *t = rand_array(N);
double ans_c, ans_asm;
// C
begin = clock();
ans_c = f_c(t,N);
end = clock();
printf("\n----\t%s benchmark \t----\n", name);
printf("----\tC: %fs \t (output: %f)\n", elapsed(begin,end),ans_c);
// ASM
begin = clock();
ans_asm = f_asm(t,N);
end = clock();
printf("----\tASM: %fs \t (output: %f)\n\n", elapsed(begin, end), ans_asm);
// assert(fabs(ans_c - ans_asm) < 1e-6);
}
void benchmark_prox(func_prox f_c, func_prox f_asm, char name[64], int N,
int align)
{
clock_t begin, end;
double *t = rand_array(N);
double *ans_c, *ans_asm;
ans_c = (double*) malloc(N*sizeof(double));
ans_asm = (double*) _mm_malloc(N*sizeof(double),align);
if(!ans_c || !ans_asm) exit(EXIT_FAILURE);
memcpy(ans_c, t, N*sizeof(double));
memcpy(ans_asm, t, N*sizeof(double));
// C
begin = clock();
f_c(ans_c,N, 0.2);
end = clock();
printf("\n----\t%s benchmark \t----\n", name);
printf("----\tC: %fs \n", elapsed(begin,end));
// ASM
begin = clock();
f_asm(ans_asm,N,0.2);
end = clock();
assert(array_equality(ans_c, ans_asm, N));
printf("----\t%s: %fs \n", align==ALIGN_SSE?"SSE":"AVX",elapsed(begin, end));
free(ans_c);
_mm_free(ans_asm);
}
int main()
{
int N;
N = 10000000;
//srand(time(0));
benchmark_vec(c_sum, sse_sum, "SSE SUM", N);
benchmark_vec(c_norm2, sse_norm2, "SSE NORM2",N);
benchmark_vec(c_normalize, sse_normalize,"SSE NORMALIZE",N);
benchmark_vec(c_norm1, sse_norm1, "SSE NORM1",N);
benchmark_prox(c_proxl1, sse_intrin_proxl1, "SSE PROXL1",N,ALIGN_SSE);
benchmark_vec(c_sum, avx_sum, "AVX SUM", N);
benchmark_vec(c_norm2, avx_norm2, "AVX NORM2", N);
benchmark_prox(c_proxl1, avx_proxl1, "AVX PROXL1",N,ALIGN_AVX);
benchmark_prox(c_proxl2square, avx_proxl2square, "AVX PROXL2", N, ALIGN_AVX);
benchmark_prox(c_projBox, avx_projBox, "AVX PROJBOX", N, ALIGN_AVX);
return 0;
}