-
Notifications
You must be signed in to change notification settings - Fork 18
/
lat_mem_rd.c
169 lines (149 loc) · 3.74 KB
/
lat_mem_rd.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*
* lat_mem_rd.c - measure memory load latency
*
* usage: lat_mem_rd [-P <parallelism>] [-W <warmup>] [-N <repetitions>] [-t] size-in-MB [stride ...]
*
* Copyright (c) 1994 Larry McVoy.
* Copyright (c) 2003, 2004 Carl Staelin.
*
* Distributed under the FSF GPL with additional restriction that results
* may published only if:
* (1) the benchmark is unmodified, and
* (2) the version in the sccsid below is included in the report.
* Support for this development by Sun Microsystems is gratefully acknowledged.
*/
char *id = "$Id: s.lat_mem_rd.c 1.13 98/06/30 16:13:49-07:00 lm@lm.bitmover.com $\n";
#include "bench.h"
#define STRIDE (512/sizeof(char *))
#define LOWER 512
void loads(size_t len, size_t range, size_t stride,
int parallel, int warmup, int repetitions);
size_t step(size_t k);
void initialize(iter_t iterations, void* cookie);
benchmp_f fpInit = stride_initialize;
int
main(int ac, char **av)
{
int i;
int c;
int parallel = 1;
int warmup = 0;
int repetitions = -1;
size_t len;
size_t range;
size_t stride;
char *usage = "[-P <parallelism>] [-W <warmup>] [-N <repetitions>] [-t] len [stride...]\n";
while (( c = getopt(ac, av, "tP:W:N:")) != EOF) {
switch(c) {
case 't':
fpInit = thrash_initialize;
break;
case 'P':
parallel = atoi(optarg);
if (parallel <= 0) lmbench_usage(ac, av, usage);
break;
case 'W':
warmup = atoi(optarg);
break;
case 'N':
repetitions = atoi(optarg);
break;
default:
lmbench_usage(ac, av, usage);
break;
}
}
if (optind == ac) {
lmbench_usage(ac, av, usage);
}
len = atoi(av[optind]);
len *= 1024 * 1024;
if (optind == ac - 1) {
fprintf(stderr, "\"stride=%d\n", (int)STRIDE);
for (range = LOWER; range <= len; range = step(range)) {
loads(len, range, STRIDE, parallel,
warmup, repetitions);
}
} else {
for (i = optind + 1; i < ac; ++i) {
stride = bytes(av[i]);
fprintf(stderr, "\"stride=%d\n", (int)stride);
for (range = LOWER; range <= len; range = step(range)) {
loads(len, range, stride, parallel,
warmup, repetitions);
}
fprintf(stderr, "\n");
}
}
return(0);
}
#define ONE p = (char **)*p;
#define FIVE ONE ONE ONE ONE ONE
#define TEN FIVE FIVE
#define FIFTY TEN TEN TEN TEN TEN
#define HUNDRED FIFTY FIFTY
void
benchmark_loads(iter_t iterations, void *cookie)
{
struct mem_state* state = (struct mem_state*)cookie;
register char **p = (char**)state->p[0];
register size_t i;
register size_t count = state->len / (state->line * 100) + 1;
while (iterations-- > 0) {
for (i = 0; i < count; ++i) {
HUNDRED;
}
}
use_pointer((void *)p);
state->p[0] = (char*)p;
}
void
loads(size_t len, size_t range, size_t stride,
int parallel, int warmup, int repetitions)
{
double result;
size_t count;
struct mem_state state;
if (range < stride) return;
state.width = 1;
state.len = range;
state.maxlen = len;
state.line = stride;
state.pagesize = getpagesize();
count = 100 * (state.len / (state.line * 100) + 1);
#if 0
(*fpInit)(0, &state);
fprintf(stderr, "loads: after init\n");
(*benchmark_loads)(2, &state);
fprintf(stderr, "loads: after benchmark\n");
mem_cleanup(0, &state);
fprintf(stderr, "loads: after cleanup\n");
settime(1);
save_n(1);
#else
/*
* Now walk them and time it.
*/
benchmp(fpInit, benchmark_loads, mem_cleanup,
100000, parallel, warmup, repetitions, &state);
#endif
/* We want to get to nanoseconds / load. */
save_minimum();
result = (1000. * (double)gettime()) / (double)(count * get_n());
fprintf(stderr, "%.5f %.3f\n", range / (1024. * 1024.), result);
}
size_t
step(size_t k)
{
if (k < 1024) {
k = k * 2;
} else if (k < 4*1024) {
k += 1024;
} else {
size_t s;
for (s = 4 * 1024; s <= k; s *= 2)
;
k += s / 4;
}
return (k);
}