-
Notifications
You must be signed in to change notification settings - Fork 84
/
scanDocuments.ts
65 lines (56 loc) · 1.68 KB
/
scanDocuments.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import fs from "fs";
import path from "path";
import util from "util";
import { DocInfoWithFilePath, SearchDocument } from "../../shared/interfaces";
import { parse } from "./parse";
import { debugVerbose } from "./debug";
const readFileAsync = util.promisify(fs.readFile);
let nextDocId = 0;
const getNextDocId = () => {
return (nextDocId += 1);
};
export async function scanDocuments(
DocInfoWithFilePathList: DocInfoWithFilePath[]
): Promise<SearchDocument[][]> {
const titleDocuments: SearchDocument[] = [];
const headingDocuments: SearchDocument[] = [];
const contentDocuments: SearchDocument[] = [];
const allDocuments = [titleDocuments, headingDocuments, contentDocuments];
await Promise.all(
DocInfoWithFilePathList.map(async ({ filePath, url, type }) => {
debugVerbose(
`parsing %s file %o of %o`,
type,
path.relative(process.cwd(), filePath),
url
);
const html = await readFileAsync(filePath, { encoding: "utf8" });
const { pageTitle, sections } = parse(html, type, url);
const titleId = getNextDocId();
titleDocuments.push({
i: titleId,
t: pageTitle,
u: url,
});
for (const section of sections) {
if (section.title !== pageTitle) {
headingDocuments.push({
i: getNextDocId(),
t: section.title,
u: url + section.hash,
p: titleId,
});
}
if (section.content) {
contentDocuments.push({
i: getNextDocId(),
t: section.content,
u: url + section.hash,
p: titleId,
});
}
}
})
);
return allDocuments;
}