-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathc5-threaded-strstr.c
142 lines (118 loc) · 4.7 KB
/
c5-threaded-strstr.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Purpose: Seems like we might be CPU time limited or multiple cores can use more memory bandwith than just one core.
// Divide the entire file into n slices and search each one in parallel with threads.
// We need to take some care to make sure that each slice is zero-terminated. We insert the \0 between slices and use a
// mmap() trick to get the last \0 after the mapped file data.
// Result: Looks like we get linear speedup until we get memory bandwith limited. Nice. :)
#define _GNU_SOURCE // for memrchr()
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
struct worker_args {
const char* search;
const char* slice_start;
const char* slice_end;
};
void* worker_thread(void* args_ptr) {
struct worker_args* args = args_ptr;
//printf("[worker %lX] slice: %p - %p, %zu bytes\n", pthread_self(), args->slice_start, args->slice_end, args->slice_end - args->slice_start);
int search_length = strlen(args->search);
const char* pos = args->slice_start;
const char* match = NULL;
while ( (match = strstr(pos, args->search)) ) {
pos = match + search_length;
// search for prev and next line breaks
const char* start = memrchr(args->slice_start, '\n', match - args->slice_start);
start = (start == NULL) ? args->slice_start : start + 1; // skip outputting the found line break
const char* end = memchr(match, '\n', args->slice_end - match);
if (end == NULL)
end = args->slice_end;
printf("[worker %lX] %.*s\n", pthread_self(), (int)(end - start), start);
}
return NULL;
}
int main(int argc, char** argv) {
if (argc != 4) {
fprintf(stderr, "usage: %s search-term file number-of-threads\n", argv[0]);
return 1;
}
const char* search = argv[1];
const char* file = argv[2];
int number_of_threads = atoi(argv[3]);
int fd = open(file, O_RDONLY);
if ( fd == -1 ) {
perror("open() failed");
exit(1);
}
struct stat stats;
if ( fstat(fd, &stats) == -1 ) {
perror("fstat() failed");
exit(1);
}
// Map the entire file + 1 byte at the end. That byte will automatically be zero-filled (accoring to
// https://stackoverflow.com/a/32255592). We need it since the strstr() of the last thread needs a zero-terminator,
// too.
size_t data_size = stats.st_size;
char* data_start = mmap(NULL, data_size + 1, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
char* data_end = data_start + data_size;
if (data_start == MAP_FAILED) {
perror("mmap() failed");
exit(1);
}
//printf("data: %p - %p, %zu bytes\n", data_start, data_end, data_size);
// Slice data into number_of_threads pieces (or fewer if there isn't enough data) so we can search each in parallel
struct {
pthread_t id;
struct worker_args args;
} threads[number_of_threads];
int thread_count = 0;
size_t slice_size = data_size / number_of_threads;
char* slice_start = NULL;
char* slice_end = NULL;
for (slice_start = data_start; slice_start < data_end; slice_start = slice_end + 1 /* the +1 skips the \0 at the end of the previous slice */) {
// Start at the ideal slice_end and then search for the next \n
slice_end = slice_start + slice_size;
if (slice_end < data_end) {
// Only search for the next \n if we're not already at the end of the last slice
slice_end = memchr(slice_end, '\n', data_end - slice_end);
if (slice_end == NULL) {
// No \n found in the remaining data, we're at the end of the last slice
slice_end = data_end;
} else {
// Replace the \n at the end of each slice with a \0 so strstr() stops there. The mapping is writable and
// private so the changes are not saved to disk. This causes a segfault for the last slice since we try to
// write beyond the file that backs the mapping. But we mapped one byte more than the file size and linux
// fills bytes beyond the file size with zeros. So we already have our zero-terminator.
slice_end[0] = '\0';
}
} else {
slice_end = data_end;
}
//printf("slice %d: %p - %p, %zu bytes\n", thread_count, slice_start, slice_end, slice_end - slice_start);
threads[thread_count].args = (struct worker_args){
.search = search,
.slice_start = slice_start,
.slice_end = slice_end
};
thread_count++;
}
// Change all pages in the memory mapping to read-only to avoid overhead (doesn't seem necessary though)
if ( mprotect(data_start, data_size + 1, PROT_READ) == -1 ) {
perror("fstat() failed");
exit(1);
}
// Spawn threads
for (int i = 0; i < thread_count; i++)
pthread_create(&threads[i].id, NULL, worker_thread, &threads[i].args);
// Wait for them
for (int i = 0; i < thread_count; i++)
pthread_join(threads[i].id, NULL);
munmap(data_start, data_size + 1);
close(fd);
return 0;
}