Skip to content

Commit

Permalink
feat: refacto (part 2) (#396)
Browse files Browse the repository at this point in the history
  • Loading branch information
bodinsamuel committed Aug 12, 2019
1 parent 6b954d5 commit 2df582b
Show file tree
Hide file tree
Showing 8 changed files with 514 additions and 769 deletions.
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -45,10 +45,10 @@
"hot-shots": "6.3.0",
"lodash": "4.17.15",
"ms": "2.1.2",
"nano": "8.1.0",
"nice-package": "3.1.0",
"numeral": "2.0.6",
"object-sizeof": "1.4.0",
"pouchdb-http": "6.0.2",
"promise-rat-race": "1.5.1",
"traverse": "0.6.6",
"truncate-utf8-bytes": "1.0.2"
Expand Down
134 changes: 134 additions & 0 deletions src/bootstrap.js
@@ -0,0 +1,134 @@
import * as algolia from './algolia/index.js';
import config from './config.js';
import datadog from './datadog.js';
import log from './log.js';
import ms from 'ms';
import * as npm from './npm/index.js';
import saveDocs from './saveDocs.js';

let loopStart;

/**
* Bootstrap is the mode that goes from 0 to all the packages in NPM
* In other word it is reindexing everything from scratch.
*
* It is useful if:
* - you are starting this project for the first time
* - you messed up with your Algolia index
* - you lagged too much behind
*
* Watch mode should/can be reliably left running for weeks/months as CouchDB is made for that.
* BUT for the moment it's mandatory to relaunch it because it's the only way to update: typescript, downloads stats.
*/
async function run(stateManager, algoliaClient, mainIndex, bootstrapIndex) {
const state = await stateManager.check();

if (state.seq > 0 && state.bootstrapDone === true) {
await algolia.putDefaultSettings(mainIndex, config);
log.info('⛷ Bootstrap: done');
return;
}

await stateManager.save({
stage: 'bootstrap',
});

const { seq, nbDocs: totalDocs } = await npm.getInfo();
if (!state.bootstrapLastId) {
// Start from 0
log.info('⛷ Bootstrap: starting from the first doc');
// first time this launches, we need to remember the last seq our bootstrap can trust
await stateManager.save({ seq });
await algolia.putDefaultSettings(bootstrapIndex, config);
} else {
log.info('⛷ Bootstrap: starting at doc %s', state.bootstrapLastId);
}

log.info('-----');
log.info(`Total packages ${totalDocs}`);
log.info('-----');

let lastProcessedId = state.bootstrapLastId;
while (lastProcessedId !== null) {
loopStart = Date.now();

lastProcessedId = await loop(lastProcessedId, stateManager, bootstrapIndex);
}

log.info('-----');
log.info('⛷ Bootstrap: done');
await stateManager.save({
bootstrapDone: true,
bootstrapLastDone: Date.now(),
});

await moveToProduction(stateManager, algoliaClient);
}

/**
* Execute one loop for bootstrap,
* Fetch N packages from `lastId`, process and save them to Algolia
* @param {string} lastId
*/
async function loop(lastId, stateManager, bootstrapIndex) {
const start = Date.now();
log.info('loop()', '::', lastId);

const options = {
limit: config.bootstrapConcurrency,
};
if (lastId) {
options.startkey = lastId;
options.skip = 1;
}

const res = await npm.findAll(options);

if (res.rows.length <= 0) {
// Nothing left to process
// We return null to stop the bootstraping
return null;
}

datadog.increment('packages', res.rows.length);
log.info(' - fetched', res.rows.length, 'packages');

const newLastId = res.rows[res.rows.length - 1].id;

const saved = await saveDocs({ docs: res.rows, index: bootstrapIndex });
await stateManager.save({
bootstrapLastId: newLastId,
});
log.info(` - saved ${saved} packages`);

await logProgress(res.offset, res.rows.length);

datadog.timing('loop', Date.now() - start);

return newLastId;
}

async function moveToProduction(stateManager, algoliaClient) {
log.info('🚚 starting move to production');

const currentState = await stateManager.get();
await algoliaClient.copyIndex(config.bootstrapIndexName, config.indexName);

await stateManager.save(currentState);
}

async function logProgress(offset, nbDocs) {
const { nbDocs: totalDocs } = await npm.getInfo();

const ratePerSecond = nbDocs / ((Date.now() - loopStart) / 1000);
log.info(
`[progress] %d/%d docs (%d%), current rate: %d docs/s (%s remaining)`,
offset + nbDocs,
totalDocs,
Math.floor((Math.max(offset + nbDocs, 1) / totalDocs) * 100),
Math.round(ratePerSecond),
ms(((totalDocs - offset - nbDocs) / ratePerSecond) * 1000)
);
}

export { run };
3 changes: 2 additions & 1 deletion src/config.js
Expand Up @@ -3,7 +3,8 @@ dotenv.config();
import ms from 'ms';

const defaultConfig = {
npmRegistryEndpoint: 'https://replicate.npmjs.com/registry',
npmRegistryEndpoint: 'https://replicate.npmjs.com',
npmRegistryDBName: 'registry',
npmDownloadsEndpoint: 'https://api.npmjs.org/downloads',
npmRootEndpoint: 'https://registry.npmjs.org',
jsDelivrHitsEndpoint:
Expand Down

0 comments on commit 2df582b

Please sign in to comment.