-
Notifications
You must be signed in to change notification settings - Fork 0
/
Netgrep.ts
207 lines (191 loc) · 6.34 KB
/
Netgrep.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import { search_bytes } from '@netgrep/search';
import { BatchNetgrepResult } from './data/BatchNetgrepResult.js';
import { NetgrepConfig } from './data/NetgrepConfig.js';
import { NetgrepInput } from './data/NetgrepInput.js';
import { NetgrepResult } from './data/NetgrepResult.js';
import { NetgrepSearchConfig } from './data/NetgrepSearchConfig.js';
/**
* The default configuration used by `netgrep`.
*/
const defaultConfig: NetgrepConfig = {
enableMemoryCache: true,
};
/**
* The `netgrep` library allows to search remote files
* for a specific pattern using the `ripgrep` library
* over HTTP.
*/
export class Netgrep {
private readonly config: NetgrepConfig;
private readonly memoryCache: Record<string, Uint8Array> = {};
constructor(config?: Partial<NetgrepConfig>) {
this.config = {
...defaultConfig,
...config,
};
}
/**
* Search a remote file for a specific pattern.
* This method uses `ripgrep` under the hood in order to
* start searching while downloading the file instead of
* waiting for the whole file to be available offline.
*
* @param url
* The url to the remote file.
* @param pattern
* The pattern to search for. This can be anything `ripgrep` can understand.
* @param metadata
* An optional object that will be returned back as soon as a match
* as been found in the file.
* @param config
* An optional configuration respecting the `NetgrepSearchConfig` type.
* @returns
* A promise resolving to a `NetgrepResult<T>` as soon as a match will
* be found in the remote file.
*/
public search<T extends object>(
url: string,
pattern: string,
metadata?: T,
config?: NetgrepSearchConfig
): Promise<NetgrepResult<T>> {
return new Promise((resolve, reject) => {
const handleReader = (
reader: ReadableStreamDefaultReader<Uint8Array>
) => {
return reader.read().then(({ value, done }) => {
// If the reader is actually done
// let's quit this job returning `false`.
if (done) {
resolve({ url, pattern, result: false, metadata });
return;
}
// Execute the search in the current chunk of bytes
// using the underneath WASM core module.
const u8Array = new Uint8Array(value);
const result = search_bytes(u8Array, pattern);
// Store the `Uint8Array` in the memory cache
// if it's enabled.
if (this.config.enableMemoryCache) {
this.upsertMemoryCache(url, u8Array);
}
if (result) {
resolve({ url, pattern, result: true, metadata });
} else {
handleReader(reader);
}
});
};
// Search the content in the memory cache
// if it's enabled.
if (this.config.enableMemoryCache && this.memoryCache[url]) {
const result = search_bytes(this.memoryCache[url], pattern);
resolve({ url, pattern, result, metadata });
return;
}
fetch(url, { signal: config?.signal })
.then((res) =>
!res.body
? Promise.reject(new Error("The response doesn't contain a body"))
: Promise.resolve(res.body.getReader())
)
.then(handleReader)
.catch(reject);
});
}
/**
* Execute the `search` method in batch for multiple
* files. This method returns a promise waiting for all
* the executed searches to complete.
*
* @param urls
* An array of `NetgrepInput<T>` containing the urls to the
* files. `T` is the generic type for the optional metadata to
* pass for each url.
* @param pattern
* The pattern to search for. This can be anything `ripgrep` can understand.
* @param config
* An optional configuration respecting the `NetgrepSearchConfig` type.
* @returns
* A promise waiting for all the executed searches to complete.
*/
public searchBatch<T extends object>(
inputs: Array<NetgrepInput<T>>,
pattern: string,
config?: NetgrepSearchConfig
): Promise<Array<BatchNetgrepResult<T>>> {
return Promise.all(
inputs.map((input) => {
const { url } = input;
return this.search(url, pattern, input.metadata, config)
.then((res) => ({ ...res, error: null }))
.catch((err) => ({
url,
result: false,
pattern,
metadata: input.metadata,
error: this.serializeError(err),
}));
})
);
}
/**
* Execute the `search` method in batch for multiple
* files. This method takes a callback as an input and
* executes it everytime a match happens.
*
* @param urls
* An array of `NetgrepInput<T>` containing the urls to the
* files. `T` is the generic type for the optional metadata to
* pass for each url.
* @param pattern
* The pattern to search for. This can be anything `ripgrep` can understand.
* @param cb
* The callback that will be triggered at every match. It takes
* a `BatchNetgrepResult<T>` as a parameter.
* @param config
* An optional configuration respecting the `NetgrepSearchConfig` type.
*/
public searchBatchWithCallback<T extends object>(
inputs: Array<NetgrepInput<T>>,
pattern: string,
cb: (result: BatchNetgrepResult<T>) => void,
config?: NetgrepSearchConfig
): void {
inputs.forEach((input) => {
const { url } = input;
this.search(url, pattern, input.metadata, config)
.then((res) => cb({ ...res, error: null }))
.catch((err) =>
cb({
url,
result: false,
pattern,
metadata: input.metadata,
error: this.serializeError(err),
})
);
});
}
/**
* Transform an `unknown` type returned from a catch
* into a `string`.
*/
private serializeError(err: unknown): string {
if (err instanceof Error) {
return err.message;
} else {
return JSON.stringify(err);
}
}
/**
* Upsert a slice of bytes into the in-memory cache.
*/
private upsertMemoryCache(url: string, bytes: Uint8Array) {
const currentBlockLength = this.memoryCache[url]?.length || 0;
const joinedArray = new Uint8Array(currentBlockLength + bytes.length);
if (this.memoryCache[url]) joinedArray.set(this.memoryCache[url]);
joinedArray.set(bytes, currentBlockLength);
this.memoryCache[url] = joinedArray;
}
}