From 8379ee1312a7837aa97ae8d77b70e54cd27701c8 Mon Sep 17 00:00:00 2001 From: Samuel Bodin Date: Sat, 16 Apr 2022 20:04:37 +0200 Subject: [PATCH] feat: refresh packages (#819) --- .eslintrc | 16 ++- src/@types/pkg.ts | 1 + .../__snapshots__/formatPkg.test.ts.snap | 7 + src/__tests__/formatPkg.test.ts | 10 +- src/__tests__/saveDocs.test.ts | 29 +--- src/api.ts | 2 +- src/bootstrap.ts | 11 +- src/config.ts | 3 + src/errors.ts | 1 + src/formatPkg.ts | 33 +++-- src/npm/Prefetcher.ts | 2 +- src/npm/index.ts | 15 +- src/saveDocs.ts | 125 ++-------------- src/utils/getExpiresAt.ts | 19 +++ src/watch.ts | 133 ++++++++++++++++-- tsconfig.json | 33 +++-- yarn.lock | 7 + 17 files changed, 248 insertions(+), 199 deletions(-) create mode 100644 src/errors.ts create mode 100644 src/utils/getExpiresAt.ts diff --git a/.eslintrc b/.eslintrc index 8603ce9e1..54f000b5d 100644 --- a/.eslintrc +++ b/.eslintrc @@ -16,6 +16,18 @@ "ts": "never" } } - ], - } + ] + }, + "overrides": [ + { + "files": [ + "**/*.ts" + ], + "rules": { + "consistent-return": [ + "off" + ] + } + } + ] } diff --git a/src/@types/pkg.ts b/src/@types/pkg.ts index fc3be1025..4e802a237 100644 --- a/src/@types/pkg.ts +++ b/src/@types/pkg.ts @@ -43,6 +43,7 @@ export type ComputedMeta = { export interface RawPkg { objectID: string; + rev: string; name: string; downloadsLast30Days: number; downloadsRatio: number; diff --git a/src/__tests__/__snapshots__/formatPkg.test.ts.snap b/src/__tests__/__snapshots__/formatPkg.test.ts.snap index 98a2b9f3d..7ccdac291 100644 --- a/src/__tests__/__snapshots__/formatPkg.test.ts.snap +++ b/src/__tests__/__snapshots__/formatPkg.test.ts.snap @@ -60,6 +60,7 @@ Object { "url": "https://github.com/algolia/npm-search", "user": "algolia", }, + "rev": Any, "styleTypes": Array [], "tags": Object { "latest": "1.2.3", @@ -238,6 +239,7 @@ Are you in trouble? Read through our [contribution guidelines](https://bitbucket "url": "git+https://bitbucket.org/atlassian/atlaskit.git", "user": "atlassian", }, + "rev": Any, "styleTypes": Array [], "tags": Object { "latest": "1.6.1", @@ -379,6 +381,7 @@ Object { "url": "https://github.com/atomic-package/tab", "user": "atomic-package", }, + "rev": Any, "styleTypes": Array [], "tags": Object { "latest": "0.0.4", @@ -511,6 +514,7 @@ Object { "url": "https://github.com/algolia/create-instantsearch-app", "user": "algolia", }, + "rev": Any, "styleTypes": Array [], "tags": Object { "latest": "4.4.2", @@ -599,6 +603,7 @@ index(arr, obj); MIT", "repository": null, + "rev": Any, "styleTypes": Array [], "tags": Object { "latest": "0.0.1", @@ -775,6 +780,7 @@ Thank you so much for contributing!! "url": "https://github.com/LeaVerou/prism", "user": "LeaVerou", }, + "rev": Any, "styleTypes": Array [ "css", ], @@ -881,6 +887,7 @@ Object { "url": "https://github.com/algolia/npm-search", "user": "algolia", }, + "rev": Any, "styleTypes": Array [], "tags": undefined, "types": Object { diff --git a/src/__tests__/formatPkg.test.ts b/src/__tests__/formatPkg.test.ts index d8dc6fc7d..2545cec6a 100644 --- a/src/__tests__/formatPkg.test.ts +++ b/src/__tests__/formatPkg.test.ts @@ -1,7 +1,8 @@ import NicePackage from 'nice-package'; import isISO8601 from 'validator/lib/isISO8601.js'; -import formatPkg, { +import { + formatPkg, getRepositoryInfo, getMains, getVersions, @@ -15,7 +16,7 @@ import rawPackages from './rawPackages.json'; const BASE: GetPackage = { _id: '0', 'dist-tags': {}, - _rev: '', + _rev: 'rev-1', name: '0', maintainers: [], readme: '', @@ -63,6 +64,7 @@ describe('general', () => { .map((formattedPackage) => expect(formattedPackage).toMatchSnapshot( { + rev: expect.any(String), lastCrawl: expect.any(String), _searchInternal: { expiresAt: expect.any(Number), @@ -93,7 +95,7 @@ describe('general', () => { }; const formatted = formatPkg(pkg); const postfix = ' **TRUNCATED**'; - const ending = formatted.readme.substr( + const ending = formatted.readme.substring( formatted.readme.length - postfix.length ); @@ -103,6 +105,7 @@ describe('general', () => { expect(ending).toBe(postfix); expect(formatted).toMatchSnapshot({ + rev: expect.any(String), readme: expect.any(String), lastCrawl: expect.any(String), _searchInternal: { @@ -845,6 +848,7 @@ describe('deprecated', () => { const formatted = formatPkg(pkg); expect(formatted).toMatchSnapshot({ + rev: expect.any(String), lastCrawl: expect.any(String), deprecated: 'Yes this is deprecated', isDeprecated: true, diff --git a/src/__tests__/saveDocs.test.ts b/src/__tests__/saveDocs.test.ts index 8356868fc..fd9806d46 100644 --- a/src/__tests__/saveDocs.test.ts +++ b/src/__tests__/saveDocs.test.ts @@ -1,25 +1,20 @@ import algoliasearch from 'algoliasearch'; -import { saveDocs, saveDoc } from '../saveDocs'; +import { formatPkg } from '../formatPkg'; +import { saveDoc } from '../saveDocs'; import preact from './preact-simplified.json'; jest.setTimeout(15000); -describe('batch', () => { - it('should be similar batch vs one', async () => { +describe('saveDoc', () => { + it('should always produce the same records', async () => { const client = algoliasearch('e', ''); const index = client.initIndex('a'); - let batch; - let single; - jest.spyOn(index, 'saveObjects').mockImplementationOnce((val) => { - batch = val[0]; - return true as any; - }); - jest.spyOn(index, 'saveObject').mockImplementationOnce((val) => { - single = val; + jest.spyOn(index, 'saveObject').mockImplementationOnce(() => { return true as any; }); + const final = { _searchInternal: { alternativeNames: ['preact', 'preact.js', 'preactjs'], @@ -221,18 +216,8 @@ describe('batch', () => { }), }); - const row = { id: '', key: 'preact', value: { rev: 'a' }, doc: preact }; - await saveDocs({ docs: [row], index }); - await saveDoc({ row: preact, index }); + await saveDoc({ formatted: formatPkg(preact), index }); - expect(index.saveObjects).toHaveBeenCalledWith([clean]); expect(index.saveObject).toHaveBeenCalledWith(clean); - expect(single).toMatchObject({ - ...batch, - lastCrawl: expect.any(String), - _searchInternal: { - expiresAt: expect.any(Number), - }, - }); }); }); diff --git a/src/api.ts b/src/api.ts index 29cf35a7e..895c2925c 100644 --- a/src/api.ts +++ b/src/api.ts @@ -5,7 +5,7 @@ import { log } from './utils/log'; // Used for health check export function createAPI(): void { - const server = http.createServer((req, res) => { + const server = http.createServer((_req, res) => { datadog.check('main', datadog.CHECKS.OK); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end( diff --git a/src/bootstrap.ts b/src/bootstrap.ts index 69ec5dbbc..de8d660ac 100644 --- a/src/bootstrap.ts +++ b/src/bootstrap.ts @@ -6,6 +6,7 @@ import chalk from 'chalk'; import type { StateManager } from './StateManager'; import * as algolia from './algolia'; import { config } from './config'; +import { formatPkg } from './formatPkg'; import * as npm from './npm'; import type { PrefetchedPkg } from './npm/Prefetcher'; import { Prefetcher } from './npm/Prefetcher'; @@ -55,7 +56,7 @@ export class Bootstrap { if (state.seq && state.seq > 0 && state.bootstrapDone === true) { await algolia.putDefaultSettings(this.mainIndex, config); - log.info('⛷ Bootstrap: done'); + log.info('⛷ Bootstrap: already done'); log.info('-----'); return; @@ -118,7 +119,7 @@ export class Bootstrap { this.consumer.kill(); - this.onDone(); + await this.onDone(); } async onDone(): Promise { @@ -192,7 +193,11 @@ function createPkgConsumer( return; } - await saveDoc({ row: res, index }); + const formatted = formatPkg(res); + if (!formatted) { + return; + } + await saveDoc({ formatted, index }); const lastId = (await stateManager.get()).bootstrapLastId; diff --git a/src/config.ts b/src/config.ts index 73da3463d..9616c06f5 100644 --- a/src/config.ts +++ b/src/config.ts @@ -15,6 +15,7 @@ const indexSettings: Settings = { 'searchable(keywords)', 'searchable(computedKeywords)', 'searchable(owner.name)', + '_searchInternal.expiresAt', 'deprecated', 'isDeprecated', 'types.ts', @@ -47,6 +48,7 @@ const indexSettings: Settings = { optionalWords: ['js', 'javascript'], separatorsToIndex: '_', replaceSynonymsInHighlight: false, + maxValuesPerFacet: 1000, }; const indexSynonyms: Synonym[] = [ @@ -176,6 +178,7 @@ export const config = { retryMax: 2, retrySkipped: ms('1 minute'), retryBackoffPow: 3, + refreshPeriod: ms('2 minutes'), }; export type Config = typeof config; diff --git a/src/errors.ts b/src/errors.ts new file mode 100644 index 000000000..c9ce95ff9 --- /dev/null +++ b/src/errors.ts @@ -0,0 +1 @@ +export class DeletedError extends Error {} diff --git a/src/formatPkg.ts b/src/formatPkg.ts index 202a87377..89a7afd8d 100644 --- a/src/formatPkg.ts +++ b/src/formatPkg.ts @@ -20,6 +20,8 @@ import type { } from './@types/pkg'; import { config } from './config'; import type { GetPackage, GetUser, PackageRepo } from './npm/types'; +import { datadog } from './utils/datadog'; +import { getExpiresAt } from './utils/getExpiresAt'; const defaultGravatar = 'https://www.gravatar.com/avatar/'; @@ -59,11 +61,14 @@ const registrySubsetRules: Array<(pkg: NicePackageType) => Subset> = [ }), ]; -export default function formatPkg(pkg: GetPackage): RawPkg | undefined { +export function formatPkg(pkg: GetPackage): RawPkg | undefined { + const start = Date.now(); + // Be careful NicePackage modify the Object ref const cleaned: NicePackageType | undefined = new NicePackage(pkg); if (!cleaned || !cleaned.name) { - return undefined; + return; } + if (Array.isArray(cleaned.main)) { // https://github.com/angular-ui/bootstrap-bower/issues/52 cleaned.main = cleaned.main[0]; @@ -110,7 +115,7 @@ export default function formatPkg(pkg: GetPackage): RawPkg | undefined { } if (!githubRepo && !lastPublisher && !author) { - return undefined; // ignore this package, we cannot link it to anyone + return; // ignore this package, we cannot link it to anyone } const repoInfo = getRepositoryInfo(defaultRepository); @@ -144,6 +149,7 @@ export default function formatPkg(pkg: GetPackage): RawPkg | undefined { const rawPkg: RawPkg = { objectID: cleaned.name, + rev: cleaned.other._rev, name: cleaned.name, downloadsLast30Days: 0, downloadsRatio: 0, @@ -180,13 +186,16 @@ export default function formatPkg(pkg: GetPackage): RawPkg | undefined { lastCrawl: new Date().toISOString(), _searchInternal: { alternativeNames, - expiresAt: new Date(Date.now() + config.expiresAt).getTime(), + expiresAt: getExpiresAt(), }, }; const truncated = truncatePackage(rawPkg); - return traverse(truncated).forEach(maybeEscape); + const escaped = traverse(truncated).forEach(maybeEscape); + + datadog.timing('formatPkg', Date.now() - start); + return escaped; } function checkSize(pkg: RawPkg): { @@ -204,7 +213,7 @@ function checkSize(pkg: RawPkg): { }; } -function truncatePackage(pkg: RawPkg): RawPkg | null { +function truncatePackage(pkg: RawPkg): RawPkg | undefined { const smallerPkg = { ...pkg }; { @@ -225,7 +234,7 @@ function truncatePackage(pkg: RawPkg): RawPkg | null { '** TRUNCATED ** this package was too big, so non-essential information was removed'; smallerPkg.versions = pkg.versions[pkg.version] ? { - [pkg.version]: pkg.versions[pkg.version], + [pkg.version]: pkg.versions[pkg.version]!, } : {}; smallerPkg.tags = pkg?.tags?.latest @@ -251,7 +260,7 @@ function truncatePackage(pkg: RawPkg): RawPkg | null { { const { isTooBig } = checkSize(smallerPkg); if (isTooBig) { - return null; + return; } } @@ -418,8 +427,8 @@ function getGitHubRepoInfo({ const [, user, project, path = ''] = result; return { - user, - project, + user: user!, + project: project!, path, head, }; @@ -461,8 +470,8 @@ function getRepositoryInfoFromHttpUrl(repository: string): Repo | null { return { url: repository, host: `${domain}.${domainTld}`, - user, - project, + user: user!, + project: project!, path, }; } diff --git a/src/npm/Prefetcher.ts b/src/npm/Prefetcher.ts index 296026280..569702bc3 100644 --- a/src/npm/Prefetcher.ts +++ b/src/npm/Prefetcher.ts @@ -82,7 +82,7 @@ export class Prefetcher { this.#ready.push(...packages); this.#offset = offset; - this.#nextKey = packages[packages.length - 1].id; + this.#nextKey = packages[packages.length - 1]!.id; } catch (err) { sentry.report(err); } diff --git a/src/npm/index.ts b/src/npm/index.ts index 78df36e25..0749a6359 100644 --- a/src/npm/index.ts +++ b/src/npm/index.ts @@ -10,6 +10,7 @@ import numeral from 'numeral'; import type { FinalPkg, RawPkg } from '../@types/pkg'; import { config } from '../config'; import { datadog } from '../utils/datadog'; +import { getExpiresAt } from '../utils/getExpiresAt'; import { log } from '../utils/log'; import { httpsAgent, request, USER_AGENT } from '../utils/request'; @@ -243,21 +244,13 @@ function computeDownload( ? downloadsLast30Days.toString().length : 0; - // Rand -48h to +48h, to spread refresh - const randHours = Math.floor(Math.random() * (-48 - 48 + 1)) + 48; - const expiresAt = - new Date( - Date.now() + (popular ? config.popularExpiresAt : config.expiresAt) - ).getTime() + - randHours * 3600 * 1000; - return { downloadsLast30Days, humanDownloadsLast30Days: numeral(downloadsLast30Days).format('0.[0]a'), downloadsRatio, popular, _searchInternal: { - expiresAt, + expiresAt: getExpiresAt(popular), downloadsMagnitude, // if the package is popular, we copy its name to a dedicated attribute // which will make popular records' `name` matches to be ranked higher than other matches @@ -313,7 +306,7 @@ async function getDownloads( const all = pkgs.map((pkg) => { return computeDownload( pkg, - downloadsPerPkgName[pkg.name], + downloadsPerPkgName[pkg.name]!, totalNpmDownloads ); }); @@ -331,7 +324,7 @@ async function getDownload( const name = encodeURIComponent(pkg.name); const totalNpmDownloads = await getTotalDownloads(); const downloads = await fetchDownload(name); - return computeDownload(pkg, downloads.body[pkg.name], totalNpmDownloads); + return computeDownload(pkg, downloads.body[pkg.name]!, totalNpmDownloads); } finally { datadog.timing('npm.getDownload', Date.now() - start); } diff --git a/src/saveDocs.ts b/src/saveDocs.ts index 1f31028d2..9716a25b8 100644 --- a/src/saveDocs.ts +++ b/src/saveDocs.ts @@ -1,138 +1,31 @@ import type { SearchIndex } from 'algoliasearch'; -import type { DocumentResponseRow } from 'nano'; import type { FinalPkg, RawPkg } from './@types/pkg'; -import { getChangelogs, getChangelog } from './changelog'; -import formatPkg from './formatPkg'; +import { getChangelog } from './changelog'; import * as jsDelivr from './jsDelivr'; -import { - getModuleTypes, - getStyleTypes, - getStyleTypesForAll, - getModuleTypesForAll, -} from './jsDelivr/pkgTypes'; +import { getModuleTypes, getStyleTypes } from './jsDelivr/pkgTypes'; import * as npm from './npm'; -import type { GetPackage } from './npm/types'; -import { getTSSupport, getTypeScriptSupport } from './typescript/index'; +import { getTypeScriptSupport } from './typescript/index'; import { datadog } from './utils/datadog'; -import { log } from './utils/log'; - -export async function saveDocs({ - docs, - index, -}: { - docs: Array>; - index: SearchIndex; -}): Promise { - const start = Date.now(); - const names: string[] = []; - - const rawPkgs = docs - .map((result) => { - const start1 = Date.now(); - - const formatted = formatPkg(result.doc!); - - datadog.timing('formatPkg', Date.now() - start1); - - if (formatted) { - names.push(formatted.name); - } - - return formatted; - }) - .filter((pkg): pkg is RawPkg => pkg !== undefined); - - if (rawPkgs.length === 0) { - log.info('🔍 No pkgs found in response.'); - return Promise.resolve(0); - } - log.info(' => ', names); - - log.info(' Adding metadata...'); - - let start2 = Date.now(); - const pkgs = await addMetaDatas(rawPkgs); - datadog.timing('saveDocs.addMetaData', Date.now() - start2); - - log.info(` Saving...`); - - start2 = Date.now(); - await index.saveObjects(pkgs); - datadog.timing('saveDocs.saveObjects', Date.now() - start2); - - log.info(` Saved`); - - datadog.timing('saveDocs', Date.now() - start); - return pkgs.length; -} export async function saveDoc({ - row, + formatted, index, }: { - row: GetPackage; + formatted: RawPkg; index: SearchIndex; }): Promise { - const start = Date.now(); - - const formatted = formatPkg(row); - - datadog.timing('formatPkg', Date.now() - start); - - if (!formatted) { - return; - } - - let start2 = Date.now(); + let start = Date.now(); const pkg = await addMetaData(formatted); - datadog.timing('saveDocs.addMetaData.one', Date.now() - start2); + datadog.timing('saveDocs.addMetaData.one', Date.now() - start); - start2 = Date.now(); + start = Date.now(); await index.saveObject(pkg); - datadog.timing('saveDocs.saveObject.one', Date.now() - start2); + datadog.timing('saveDocs.saveObject.one', Date.now() - start); datadog.timing('saveDocs.one', Date.now() - start); } -async function addMetaDatas(pkgs: RawPkg[]): Promise { - const [downloads, dependents, hits, filelists] = await Promise.all([ - npm.getDownloads(pkgs), - npm.getDependents(pkgs), - jsDelivr.getHits(pkgs), - jsDelivr.getAllFilesList(pkgs), - ]); - - const [changelogs, ts, moduleTypes, styleTypes] = await Promise.all([ - getChangelogs(pkgs, filelists), - getTSSupport(pkgs, filelists), - getModuleTypesForAll(pkgs, filelists), - getStyleTypesForAll(pkgs, filelists), - ]); - - const start = Date.now(); - const all: FinalPkg[] = pkgs.map((pkg, index) => { - return { - ...pkg, - ...downloads[index], - ...dependents[index], - ...changelogs[index], - ...hits[index], - ...ts[index], - ...moduleTypes[index], - ...styleTypes[index], - _searchInternal: { - ...pkg._searchInternal, - ...(downloads[index] ? downloads[index]!._searchInternal : {}), - ...hits[index]._searchInternal, - }, - }; - }); - - datadog.timing('saveDocs.addMetaData', Date.now() - start); - return all; -} - async function addMetaData(pkg: RawPkg): Promise { const [download, dependent, hit, filelist] = await Promise.all([ npm.getDownload(pkg), diff --git a/src/utils/getExpiresAt.ts b/src/utils/getExpiresAt.ts new file mode 100644 index 000000000..771a3f065 --- /dev/null +++ b/src/utils/getExpiresAt.ts @@ -0,0 +1,19 @@ +import { config } from '../config'; + +export function getExpiresAt(popular = false): number { + // Rand -48h to +48h, to spread refresh + const randHours = Math.floor(Math.random() * (-48 - 48 + 1)) + 48; + + // Round minutes to avoid too many values in facet + const minutes = Math.ceil(Math.floor(Math.random() * (60 + 1)) / 30) * 30; + + const expiresAt = new Date( + Date.now() + (popular ? config.popularExpiresAt : config.expiresAt) + ); + expiresAt.setMilliseconds(0); + expiresAt.setSeconds(0); + expiresAt.setMinutes(minutes); + expiresAt.setHours(randHours); + + return expiresAt.getTime(); +} diff --git a/src/watch.ts b/src/watch.ts index 7cffd1b58..be527e08c 100644 --- a/src/watch.ts +++ b/src/watch.ts @@ -1,12 +1,14 @@ -/* eslint-disable consistent-return */ import type { SearchIndex } from 'algoliasearch'; import type { QueueObject } from 'async'; import { queue } from 'async'; import chalk from 'chalk'; import type { DatabaseChangesResultItem } from 'nano'; +import type { FinalPkg } from './@types/pkg'; import type { StateManager } from './StateManager'; import { config } from './config'; +import { DeletedError } from './errors'; +import { formatPkg } from './formatPkg'; import * as npm from './npm'; import { isFailure } from './npm/types'; import { saveDoc } from './saveDocs'; @@ -15,7 +17,11 @@ import { log } from './utils/log'; import * as sentry from './utils/sentry'; import { wait } from './utils/wait'; -type ChangeJob = { change: DatabaseChangesResultItem; retry: number }; +type ChangeJob = { + change: DatabaseChangesResultItem; + retry: number; + ignoreSeq: boolean; +}; export class Watch { stateManager: StateManager; @@ -24,6 +30,7 @@ export class Watch { // Cached npmInfo.seq totalSequence: number = 0; changesConsumer: QueueObject | undefined; + pkgsLastUpdate = new Map(); constructor(stateManager: StateManager, mainIndex: SearchIndex) { this.stateManager = stateManager; @@ -77,6 +84,10 @@ export class Watch { }, 5000); this.checkSkipped(); + + // Most packages don't have enough info to enable refresh for the moment + // this.checkToRefresh(); + await this.watch(); log.info('-----'); @@ -106,7 +117,10 @@ export class Watch { since: String(seq), }) .on('change', (change) => { - changesConsumer.push({ change, retry: 0 }); + changesConsumer.push({ change, retry: 0, ignoreSeq: false }); + if (change.id) { + this.pkgsLastUpdate.set(change.id, Date.now()); + } // on:change will not wait for us to process to trigger again // So we need to control the fetch manually otherwise it will fetch thousand/millions of update in advance @@ -118,7 +132,6 @@ export class Watch { sentry.report(err); }); log.info(`listening from ${seq}...`); - changesConsumer.saturated(() => { if (changesConsumer.length() < 5) { npm.db.changesReader.resume(); @@ -144,7 +157,7 @@ export class Watch { this.skipped.clear(); for (const job of clone) { - this.changesConsumer?.unshift({ ...job, retry: 0 }); + this.changesConsumer?.unshift({ ...job, retry: 0, ignoreSeq: true }); } } @@ -153,6 +166,85 @@ export class Watch { }, config.retrySkipped); } + /** + * Regularly try to refresh packages informations. + * Mostly here for time based data: download stats, popularity, etc...&. + */ + async checkToRefresh(): Promise { + log.info('Checking refresh jobs'); + + // schedule next iteration + setTimeout(() => { + this.checkToRefresh(); + }, config.refreshPeriod); + + // We list all values in facet and pick the oldest (hopefully the oldest is in the list) + const res = await this.mainIndex.search('', { + facets: ['_searchInternal.expiresAt'], + hitsPerPage: 0, + sortFacetValuesBy: 'alpha', + }); + if (!res.facets) { + log.error('Wrong results from Algolia'); + return; + } + + const list = Object.keys(res.facets['_searchInternal.expiresAt']!).sort(); + log.info(' > Found', list.length, 'expiration values'); + if (list.length <= 0) { + return; + } + + const pick = list.shift()!; + const expiresAt = new Date(parseInt(pick, 10)); + if (expiresAt.getTime() > Date.now()) { + log.info(' > Oldest date is in the future'); + return; + } + log.info(' > Picked the oldest', expiresAt.toISOString()); + + // Retrieve some packages to update, not too much to avoid flooding the queue + const pkgs = await this.mainIndex.search('', { + facetFilters: [`_searchInternal.expiresAt:${pick}`], + facets: ['_searchInternal.expiresAt'], + hitsPerPage: 20, + }); + log.info(' > Found', pkgs.hits.length, 'expired packages'); + + const pushed: string[] = []; + for (const pkg of pkgs.hits) { + if (!pkg.rev) { + continue; + } + const lastUpdate = this.pkgsLastUpdate.get(pkg.objectID); + if (lastUpdate && lastUpdate > pkg.modified) { + log.info( + 'Skipping pkg older than what we have in memory', + pkg.objectID, + pkg.modified, + lastUpdate + ); + continue; + } + + pushed.push(pkg.objectID); + + // Due to the event loop, there is a miniscule chance that an event for the same pkg come at the same time. + this.changesConsumer?.unshift({ + change: { + id: pkg.objectID, + changes: [{ rev: pkg.rev }], + seq: -1, + deleted: false, + }, + retry: 0, + ignoreSeq: true, + }); + } + + log.info(' > Pushed', pushed); + } + /** * Process changes in order. */ @@ -175,16 +267,26 @@ export class Watch { if (change.deleted) { // changesConsumer deletes the package directly in the index - throw new Error('deleted'); + throw new DeletedError(); + } + if (change.changes.length <= 0) { + log.error('Document without change'); + return; } - const res = await npm.getDoc(change.id, change.changes[0].rev); + + const res = await npm.getDoc(change.id, change.changes[0]!.rev); if (isFailure(res)) { log.error('Got an error', res.error); throw new Error(res.error); } - await saveDoc({ row: res, index: this.mainIndex }); + const formatted = formatPkg(res); + if (!formatted) { + return; + } + + await saveDoc({ formatted, index: this.mainIndex }); } /** @@ -222,6 +324,7 @@ export class Watch { return queue(async (job) => { const start = Date.now(); + const ignoreSeq = job.retry > 0 || job.ignoreSeq; const { seq, id } = job.change; log.info(`Start:`, id); @@ -233,15 +336,17 @@ export class Watch { try { await this.loop(job); - await this.stateManager.save({ - seq, - }); + if (!ignoreSeq) { + await this.stateManager.save({ + seq, + }); + } log.info(`Done:`, id); } catch (err) { // this error can be thrown by us or by nano if: // - we received a change that is not marked as "deleted" // - and the package has since been deleted - if (err instanceof Error && err.message === 'deleted') { + if (err instanceof DeletedError) { this.mainIndex.deleteObject(id); log.info(`deleted`, id); return; @@ -259,7 +364,9 @@ export class Watch { this.skipped.set(id, job); } } finally { - this.logProgress(seq); + if (!ignoreSeq) { + this.logProgress(seq); + } datadog.timing('watch.loop', Date.now() - start); } }, 1); diff --git a/tsconfig.json b/tsconfig.json index 048f5fdb8..e10e034a0 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -6,7 +6,10 @@ "target": "es2017", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', 'ES2021', or 'ESNEXT'. */ "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */ "lib": [ - "es2017" + "ES6", + "es2015", + "es2017", + "es2019", ], /* Specify library files to be included in the compilation. */ "allowJs": true, /* Allow javascript files to be compiled. */ "checkJs": false, /* Report errors in .js files. */ @@ -27,20 +30,20 @@ /* Strict Type-Checking Options */ "strict": true, /* Enable all strict type-checking options. */ "noImplicitAny": false, /* Raise error on expressions and declarations with an implied 'any' type. */ - // "strictNullChecks": true, /* Enable strict null checks. */ - // "strictFunctionTypes": true, /* Enable strict checking of function types. */ - // "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ - // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ - // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ - // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ + "strictNullChecks": true, /* Enable strict null checks. */ + "strictFunctionTypes": true, /* Enable strict checking of function types. */ + "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ + "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ + "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ + "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ /* Additional Checks */ - // "noUnusedLocals": true, /* Report errors on unused locals. */ - // "noUnusedParameters": true, /* Report errors on unused parameters. */ - // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ - // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ - // "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */ - // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an 'override' modifier. */ - // "noPropertyAccessFromIndexSignature": true, /* Require undeclared properties from index signatures to use element accesses. */ + "noUnusedLocals": true, /* Report errors on unused locals. */ + "noUnusedParameters": true, /* Report errors on unused parameters. */ + "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ + "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ + "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */ + "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an 'override' modifier. */ + // "noPropertyAccessFromIndexSignature": true, /* Require undeclared properties from index signatures to use element accesses. */ /* Module Resolution Options */ "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ @@ -63,7 +66,7 @@ /* Advanced Options */ "skipLibCheck": true, /* Skip type checking of declaration files. */ "forceConsistentCasingInFileNames": true, /* Disallow inconsistently-cased references to the same file. */ - "resolveJsonModule": true + "resolveJsonModule": true, }, "include": [ "src/*" diff --git a/yarn.lock b/yarn.lock index 499792589..a622afecf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1441,6 +1441,13 @@ dependencies: "@babel/types" "^7.3.0" +"@types/bunyan@1.8.8": + version "1.8.8" + resolved "https://registry.yarnpkg.com/@types/bunyan/-/bunyan-1.8.8.tgz#8d6d33f090f37c07e2a80af30ae728450a101008" + integrity sha512-Cblq+Yydg3u+sGiz2mjHjC5MPmdjY+No4qvHrF+BUhblsmSfMvsHLbOG62tPbonsqBj6sbWv1LHcsoe5Jw+/Ow== + dependencies: + "@types/node" "*" + "@types/cacheable-request@^6.0.1": version "6.0.2" resolved "https://registry.yarnpkg.com/@types/cacheable-request/-/cacheable-request-6.0.2.tgz#c324da0197de0a98a2312156536ae262429ff6b9"