Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Start work on omp.

  • Loading branch information...
commit 8a38be300c81fcfaad92fd5f8bf9d7c9f735bcbb 1 parent 99a2240
@inducer inducer authored
View
7 01-sequential-soln.c
@@ -4,9 +4,8 @@
int main(int argc, char **argv)
{
- if (argc != 2)
+ if (argc != 3)
{
- // STRIP
fprintf(stderr, "need two arguments!\n");
abort();
}
@@ -25,10 +24,12 @@ int main(int argc, char **argv)
y[i] = 2*i;
}
+ const int ntrips = atoi(argv[2]);
+ printf("doing %d trips...\n", ntrips);
+
struct timespec time1, time2;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
- const int ntrips = 2;
for (int trip = 0; trip < ntrips; ++trip)
{
for (int i = 0; i < n; ++i)
View
BIN  01-sequential-soln.o
Binary file not shown
View
5 02-flexible-op.c
@@ -0,0 +1,5 @@
+void add_doubles(void *tgt, void *op1, void *op2)
+{
+ *(double *) tgt = *(double *) op1 + *(double *) op2;
+}
+
View
BIN  02-flexible-op.o
Binary file not shown
View
25 02-flexible-soln.c
@@ -2,11 +2,23 @@
#include <stdio.h>
#include <stdlib.h>
+extern void add_doubles(void *tgt, void *op1, void *op2);
+
+typedef void (*operation_t)(void *, void *, void *);
+
+void do_three_operand_loop(operation_t op, void *x, void *y, void *z,
+ long item_size, long n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ op(z+i*item_size, x+i*item_size, y+i*item_size);
+ }
+}
+
int main(int argc, char **argv)
{
- if (argc != 2)
+ if (argc != 3)
{
- // STRIP
fprintf(stderr, "need two arguments!\n");
abort();
}
@@ -25,16 +37,15 @@ int main(int argc, char **argv)
y[i] = 2*i;
}
+ const int ntrips = atoi(argv[2]);
+ printf("doing %d trips...\n", ntrips);
+
struct timespec time1, time2;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
- const int ntrips = 2;
for (int trip = 0; trip < ntrips; ++trip)
{
- for (int i = 0; i < n; ++i)
- {
- z[i] = x[i] + y[i];
- }
+ do_three_operand_loop(add_doubles, x, y, z, sizeof(double), n);
}
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
View
BIN  02-flexible-soln.o
Binary file not shown
View
50 02-flexible-start.c
@@ -0,0 +1,50 @@
+#include "timing.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+extern void add_doubles(void *tgt, void *op1, void *op2);
+
+typedef void (*operation_t)(void *, void *, void *);
+
+// FIXME
+
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ fprintf(stderr, "need two arguments!\n");
+ abort();
+ }
+
+ const long n = atol(argv[1]);
+ double *x = (double *) malloc(sizeof(double) * n);
+ if (!x) { perror("alloc x"); abort(); }
+ double *y = (double *) malloc(sizeof(double) * n);
+ if (!y) { perror("alloc y"); abort(); }
+ double *z = (double *) malloc(sizeof(double) * n);
+ if (!z) { perror("alloc z"); abort(); }
+
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = i;
+ y[i] = 2*i;
+ }
+
+ const int ntrips = atoi(argv[2]);
+ printf("doing %d trips...\n", ntrips);
+
+ struct timespec time1, time2;
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
+
+ for (int trip = 0; trip < ntrips; ++trip)
+ {
+ // FIXME
+ }
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
+ double elapsed = timespec_diff_in_seconds(time1,time2)/ntrips;
+ printf("%f GB/s\n",
+ 3*n*sizeof(double)/1e9/elapsed);
+ printf("%f GFlops/s\n",
+ n/1e9/elapsed);
+}
View
69 03-openmp-soln.c
@@ -0,0 +1,69 @@
+#include "timing.h"
+#include <stdio.h>
+#include <stdlib.h>
+// FIXME STRIP
+#include <omp.h>
+#include <math.h>
+
+int main(int argc, char **argv)
+{
+ if (argc != 3)
+ {
+ fprintf(stderr, "need two arguments!\n");
+ abort();
+ }
+
+ const long n = atol(argv[1]);
+ double *x = (double *) malloc(sizeof(double) * n);
+ if (!x) { perror("alloc x"); abort(); }
+ double *y = (double *) malloc(sizeof(double) * n);
+ if (!y) { perror("alloc y"); abort(); }
+ double *z = (double *) malloc(sizeof(double) * n);
+ if (!z) { perror("alloc z"); abort(); }
+
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = i;
+ y[i] = 2*i;
+ }
+
+ const int ntrips = atoi(argv[2]);
+ printf("doing %d trips...\n", ntrips);
+
+ // FIXME STRIP
+#pragma omp parallel
+ {
+ printf("Hi, I'm thread # %d of %d\n",
+ omp_get_thread_num(),
+ omp_get_num_threads());
+ }
+
+ struct timespec time1, time2;
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time1);
+
+ // FIXME STRIP
+ for (int trip = 0; trip < ntrips; ++trip)
+ {
+ #pragma omp parallel for
+ for (int i = 0; i < n; ++i)
+ {
+ double a = x[i];
+ double b = y[i];
+
+ for (int j = 0; j < 200; ++j)
+ {
+ a = sin(a) + b;
+ b = cos(b) + a;
+ }
+
+ z[i] = a+b;
+ }
+ }
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &time2);
+ double elapsed = timespec_diff_in_seconds(time1,time2)/ntrips;
+ printf("%f s\n", elapsed);
+ printf("%f GB/s\n",
+ 3*n*sizeof(double)/1e9/elapsed);
+}
+
View
19 notes.txt
@@ -12,6 +12,8 @@ colorscheme evening
- run into segfault
- debug it
- what do the numbers mean?
+ - what are the main things that are involved?
+ - what is limiting our rate?
- timing repeatability
- O3
- set-governor performance/ondemand
@@ -20,4 +22,19 @@ colorscheme evening
- Control flow overhead
- Cache
-02-flexible.c:
+/*
+02-flexible.c: (if time)
+- Argue for flexibility
+- Code it
+- Destroy it
+*/
+
+03-openmp.c:
+- Add thread number printer
+- -lgomp
+- Run, not parallel.
+- -fopenmp
+- Do vanilla vector add.
+- OMP_NUM_THREADS=1...4
+- Depressing! What's wrong?
+- Wha
Please sign in to comment.
Something went wrong with that request. Please try again.