Skip to content

Commit

Permalink
[util/getAllDocs] extract getAllDocs to util
Browse files Browse the repository at this point in the history
  • Loading branch information
MrOrz committed Nov 18, 2021
1 parent d74b403 commit 3a0ca23
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 23 deletions.
32 changes: 9 additions & 23 deletions src/scripts/migrations/createBackendUsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import 'dotenv/config';
import client from 'util/client';
import getAllDocs from 'util/getAllDocs';
import Bulk from 'util/bulk';
import rollbar from 'rollbarInstance';
import {
Expand Down Expand Up @@ -295,30 +296,15 @@ export default class CreateBackendUsers {
* @yields {Object} the document
*/
async *getAllDocs(indexName, hasAdditionalUserFields = false) {
let resp = await client.search({
index: indexName,
scroll: SCROLL_TIMEOUT,
size: this.batchSize,
body: {
query: hasAdditionalUserFields ? matchAllQuery : backendUserQuery,
},
});

while (true) {
const docs = resp.body.hits.hits;
if (docs.length === 0) break;
for (const doc of docs) {
yield doc;
}

if (!resp.body._scroll_id) {
break;
}

resp = await client.scroll({
for await (const doc of getAllDocs(
indexName,
hasAdditionalUserFields ? matchAllQuery : backendUserQuery,
{
scroll: SCROLL_TIMEOUT,
scrollId: resp.body._scroll_id,
});
size: this.batchSize,
}
)) {
yield doc;
}
}

Expand Down
45 changes: 45 additions & 0 deletions src/util/getAllDocs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import client from 'util/client';

/**
* A generator that fetches all docs in the specified index and yield 1 doc at a time.
*
* @param {String} index The name of the index to fetch
* @param {Object} query The elasticsearch query. Fetches all doc in the specified index if not given.
* @param {Object} settings Query param settings in ES.
* @param {number} settings.size Number of docs per batch. Default to 1000.
* @param {string} settings.scroll The scroll timeout setting. Default to 30s.
* @yields {Object} the document
*/
async function* getAllDocs(
index,
query = { match_all: {} },
{ scroll = '30s', size = 1000 } = {}
) {
let resp = await client.search({
index,
scroll,
size,
body: {
query,
},
});

while (true) {
const docs = resp.body.hits.hits;
if (docs.length === 0) break;
for (const doc of docs) {
yield doc;
}

if (!resp.body._scroll_id) {
break;
}

resp = await client.scroll({
scroll,
scrollId: resp.body._scroll_id,
});
}
}

export default getAllDocs;

0 comments on commit 3a0ca23

Please sign in to comment.