-
Notifications
You must be signed in to change notification settings - Fork 2
/
sseTest.c
39 lines (31 loc) · 1.41 KB
/
sseTest.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#include <stdio.h>
#include <emmintrin.h> /* header file for the SSE intrinsics we gonna use */
int main( int argc, char **argv )
{
/* set A = |1 3|, B = |3 0| C = |0 0|
|2 4| |0 2| |0 0| */
double A[4] = {1,2,3,4}, B[4] = {3,0,0,2}, C[4] = {0,0,0,0};
/* We are computing C = C + A x B, which means:
C[0] += A[0]*B[0] + A[2]*B[1]
C[1] += A[1]*B[0] + A[3]*B[1]
C[2] += A[0]*B[2] + A[2]*B[3]
C[3] += A[1]*B[2] + A[3]*B[3] */
/* load entire matrix C into SIMD variables */
__m128d c1 = _mm_loadu_pd( C+0 ); /* c1 = (C[0],C[1]) */
__m128d c2 = _mm_loadu_pd( C+2 ); /* c2 = (C[2],C[3]) */
for( int i = 0; i < 2; i++ )
{
__m128d a = _mm_loadu_pd( A+i*2 ); /* load next column of A */
__m128d b1 = _mm_load1_pd( B+0+i );
__m128d b2 = _mm_load1_pd( B+2+i ); /* load next row of B */
c1 = _mm_add_pd( c1, _mm_mul_pd( a, b1 ) ); /* multiply and add */
c2 = _mm_add_pd( c2, _mm_mul_pd( a, b2 ) );
}
/* store the result back to the C array */
_mm_storeu_pd( C+0, c1 ); /* (C[0],C[1]) = c1 */
_mm_storeu_pd( C+2, c2 ); /* (C[2],C[3]) = c2 */
/* output whatever we've got */
printf( "|%g %g| * |%g %g| = |%g %g|\n", A[0], A[2], B[0], B[2], C[0], C[2] );
printf( "|%g %g| |%g %g| |%g %g|\n", A[1], A[3], B[1], B[3], C[1], C[3] );
return 0;
}