Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions script/search/index-elasticsearch.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ const shortNames = Object.fromEntries(

const allVersionKeys = Object.keys(shortNames)

const DEFAULT_SOURCE_DIRECTORY = path.join('lib', 'search', 'indexes')

program
.description('Creates Elasticsearch index from records')
.option('-v, --verbose', 'Verbose outputs')
Expand All @@ -55,6 +57,10 @@ program
new Option('--not-language <LANGUAGE...>', 'Specific language to omit').choices(languageKeys)
)
.option('-u, --elasticsearch-url <url>', 'If different from $ELASTICSEARCH_URL')
.option(
'-s, --source-directory <DIRECTORY>',
`Directory where records files are (default ${DEFAULT_SOURCE_DIRECTORY})`
)
.parse(process.argv)

main(program.opts())
Expand Down Expand Up @@ -91,6 +97,15 @@ async function main(opts) {
if (verbose) {
console.log(`Connecting to ${chalk.bold(safeUrlDisplay(node))}`)
}
const sourceDirectory = opts.sourceDirectory || DEFAULT_SOURCE_DIRECTORY
try {
await fs.stat(sourceDirectory)
} catch (error) {
if (error.code === 'ENOENT') {
throw new Error(`The specified directory '${sourceDirectory}' does not exist.`)
}
throw error
}

const client = new Client({
node,
Expand All @@ -113,7 +128,7 @@ async function main(opts) {
const indexName = `github-docs-${versionKey}-${language}`

console.time(`Indexing ${indexName}`)
await indexVersion(client, indexName, versionKey, language, verbose)
await indexVersion(client, indexName, versionKey, language, sourceDirectory, verbose)
console.timeEnd(`Indexing ${indexName}`)
if (verbose) {
console.log(`To view index: ${safeUrlDisplay(node + `/${indexName}`)}`)
Expand Down Expand Up @@ -149,7 +164,14 @@ function utcTimestamp() {
}

// Consider moving this to lib
async function indexVersion(client, indexName, version, language, verbose = false) {
async function indexVersion(
client,
indexName,
version,
language,
sourceDirectory,
verbose = false
) {
// Note, it's a bit "weird" that numbered releases versions are
// called the number but that's how the lib/search/indexes
// files are named at the moment.
Expand All @@ -158,7 +180,7 @@ async function indexVersion(client, indexName, version, language, verbose = fals
: shortNames[version].miscBaseName
const recordsName = `github-docs-${indexVersion}-${language}`

const records = await loadRecords(recordsName)
const records = await loadRecords(recordsName, sourceDirectory)

const thisAlias = `${indexName}__${utcTimestamp()}`

Expand Down Expand Up @@ -264,10 +286,22 @@ async function indexVersion(client, indexName, version, language, verbose = fals
}
}

async function loadRecords(indexName) {
const filePath = path.join('lib', 'search', 'indexes', `${indexName}-records.json.br`)
// Do not set to 'utf8' on file reads
return fs.readFile(filePath).then(decompress).then(JSON.parse)
async function loadRecords(indexName, sourceDirectory) {
// First try looking for the `$indexName-records.json.br` file.
// If that doens't work, look for the `$indexName-records.json` one.
try {
const filePath = path.join(sourceDirectory, `${indexName}-records.json.br`)
// Do not set to 'utf8' on file reads
const payload = await fs.readFile(filePath).then(decompress)
return JSON.parse(payload)
} catch (error) {
if (error.code === 'ENOENT') {
const filePath = path.join(sourceDirectory, `${indexName}-records.json`)
const payload = await fs.readFile(filePath)
return JSON.parse(payload)
}
throw error
}
}

function getSnowballLanguage(language) {
Expand Down