From b7b22d1685f025808de24665d2ba463678e1af37 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Tue, 19 Jul 2022 14:24:46 +0200 Subject: [PATCH] read search records from any directory (#29153) --- script/search/index-elasticsearch.mjs | 48 +++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/script/search/index-elasticsearch.mjs b/script/search/index-elasticsearch.mjs index 1af2a307d3e6..e27ee43fb9d4 100755 --- a/script/search/index-elasticsearch.mjs +++ b/script/search/index-elasticsearch.mjs @@ -44,6 +44,8 @@ const shortNames = Object.fromEntries( const allVersionKeys = Object.keys(shortNames) +const DEFAULT_SOURCE_DIRECTORY = path.join('lib', 'search', 'indexes') + program .description('Creates Elasticsearch index from records') .option('-v, --verbose', 'Verbose outputs') @@ -55,6 +57,10 @@ program new Option('--not-language ', 'Specific language to omit').choices(languageKeys) ) .option('-u, --elasticsearch-url ', 'If different from $ELASTICSEARCH_URL') + .option( + '-s, --source-directory ', + `Directory where records files are (default ${DEFAULT_SOURCE_DIRECTORY})` + ) .parse(process.argv) main(program.opts()) @@ -91,6 +97,15 @@ async function main(opts) { if (verbose) { console.log(`Connecting to ${chalk.bold(safeUrlDisplay(node))}`) } + const sourceDirectory = opts.sourceDirectory || DEFAULT_SOURCE_DIRECTORY + try { + await fs.stat(sourceDirectory) + } catch (error) { + if (error.code === 'ENOENT') { + throw new Error(`The specified directory '${sourceDirectory}' does not exist.`) + } + throw error + } const client = new Client({ node, @@ -113,7 +128,7 @@ async function main(opts) { const indexName = `github-docs-${versionKey}-${language}` console.time(`Indexing ${indexName}`) - await indexVersion(client, indexName, versionKey, language, verbose) + await indexVersion(client, indexName, versionKey, language, sourceDirectory, verbose) console.timeEnd(`Indexing ${indexName}`) if (verbose) { console.log(`To view index: ${safeUrlDisplay(node + `/${indexName}`)}`) @@ -149,7 +164,14 @@ function utcTimestamp() { } // Consider moving this to lib -async function indexVersion(client, indexName, version, language, verbose = false) { +async function indexVersion( + client, + indexName, + version, + language, + sourceDirectory, + verbose = false +) { // Note, it's a bit "weird" that numbered releases versions are // called the number but that's how the lib/search/indexes // files are named at the moment. @@ -158,7 +180,7 @@ async function indexVersion(client, indexName, version, language, verbose = fals : shortNames[version].miscBaseName const recordsName = `github-docs-${indexVersion}-${language}` - const records = await loadRecords(recordsName) + const records = await loadRecords(recordsName, sourceDirectory) const thisAlias = `${indexName}__${utcTimestamp()}` @@ -264,10 +286,22 @@ async function indexVersion(client, indexName, version, language, verbose = fals } } -async function loadRecords(indexName) { - const filePath = path.join('lib', 'search', 'indexes', `${indexName}-records.json.br`) - // Do not set to 'utf8' on file reads - return fs.readFile(filePath).then(decompress).then(JSON.parse) +async function loadRecords(indexName, sourceDirectory) { + // First try looking for the `$indexName-records.json.br` file. + // If that doens't work, look for the `$indexName-records.json` one. + try { + const filePath = path.join(sourceDirectory, `${indexName}-records.json.br`) + // Do not set to 'utf8' on file reads + const payload = await fs.readFile(filePath).then(decompress) + return JSON.parse(payload) + } catch (error) { + if (error.code === 'ENOENT') { + const filePath = path.join(sourceDirectory, `${indexName}-records.json`) + const payload = await fs.readFile(filePath) + return JSON.parse(payload) + } + throw error + } } function getSnowballLanguage(language) {