Skip to content

Commit

Permalink
feat: infiniteScroll has maxScrollHeight limit (#1945)
Browse files Browse the repository at this point in the history
  • Loading branch information
barjin committed Jun 9, 2023
1 parent ac0bd25 commit 44997bb
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 8 deletions.
Expand Up @@ -336,10 +336,16 @@ export function compileScript(scriptString: string, context: Dictionary = Object
export interface InfiniteScrollOptions {
/**
* How many seconds to scroll for. If 0, will scroll until bottom of page.
* @default 1
* @default 0
*/
timeoutSecs?: number;

/**
* How many pixels to scroll down. If 0, will scroll until bottom of page.
* @default 0
*/
maxScrollHeight?: number;

/**
* How many seconds to wait for no new content to load before exit.
* @default 4
Expand Down Expand Up @@ -373,18 +379,20 @@ export async function infiniteScroll(page: Page, options: InfiniteScrollOptions
ow(page, ow.object.validate(validators.browserPage));
ow(options, ow.object.exactShape({
timeoutSecs: ow.optional.number,
maxScrollHeight: ow.optional.number,
waitForSecs: ow.optional.number,
scrollDownAndUp: ow.optional.boolean,
buttonSelector: ow.optional.string,
stopScrollCallback: ow.optional.function,
}));

const { timeoutSecs = 0, waitForSecs = 4, scrollDownAndUp = false, buttonSelector, stopScrollCallback } = options;
const { timeoutSecs = 0, maxScrollHeight = 0, waitForSecs = 4, scrollDownAndUp = false, buttonSelector, stopScrollCallback } = options;

let finished;
const startTime = Date.now();
const CHECK_INTERVAL_MILLIS = 1000;
const SCROLL_HEIGHT_IF_ZERO = 10000;
let scrolledDistance = 0;
const maybeResourceTypesInfiniteScroll = ['xhr', 'fetch', 'websocket', 'other'];
const resourcesStats = {
newRequested: 0,
Expand Down Expand Up @@ -415,13 +423,20 @@ export async function infiniteScroll(page: Page, options: InfiniteScrollOptions
clearInterval(checkFinished);
finished = true;
}

// check if max scroll height has been reached
if (maxScrollHeight > 0 && scrolledDistance >= maxScrollHeight) {
clearInterval(checkFinished);
finished = true;
}
}, CHECK_INTERVAL_MILLIS);

const doScroll = async () => {
const bodyScrollHeight = await page.evaluate(() => document.body.scrollHeight);
const delta = bodyScrollHeight === 0 ? SCROLL_HEIGHT_IF_ZERO : bodyScrollHeight;

await page.mouse.wheel(0, delta);
scrolledDistance += delta;
};

const maybeClickButton = async () => {
Expand Down
19 changes: 17 additions & 2 deletions packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts
Expand Up @@ -446,10 +446,16 @@ export async function gotoExtended(page: Page, request: Request, gotoOptions: Di
export interface InfiniteScrollOptions {
/**
* How many seconds to scroll for. If 0, will scroll until bottom of page.
* @default 1
* @default 0
*/
timeoutSecs?: number;

/**
* How many pixels to scroll down. If 0, will scroll until bottom of page.
* @default 0
*/
maxScrollHeight?: number;

/**
* How many seconds to wait for no new content to load before exit.
* @default 4
Expand Down Expand Up @@ -483,18 +489,20 @@ export async function infiniteScroll(page: Page, options: InfiniteScrollOptions
ow(page, ow.object.validate(validators.browserPage));
ow(options, ow.object.exactShape({
timeoutSecs: ow.optional.number,
maxScrollHeight: ow.optional.number,
waitForSecs: ow.optional.number,
scrollDownAndUp: ow.optional.boolean,
buttonSelector: ow.optional.string,
stopScrollCallback: ow.optional.function,
}));

const { timeoutSecs = 0, waitForSecs = 4, scrollDownAndUp = false, buttonSelector, stopScrollCallback } = options;
const { timeoutSecs = 0, maxScrollHeight = 0, waitForSecs = 4, scrollDownAndUp = false, buttonSelector, stopScrollCallback } = options;

let finished;
const startTime = Date.now();
const CHECK_INTERVAL_MILLIS = 1000;
const SCROLL_HEIGHT_IF_ZERO = 10000;
let scrolledDistance = 0;
const maybeResourceTypesInfiniteScroll = ['xhr', 'fetch', 'websocket', 'other'];
const resourcesStats = {
newRequested: 0,
Expand Down Expand Up @@ -545,6 +553,12 @@ export async function infiniteScroll(page: Page, options: InfiniteScrollOptions
clearInterval(checkFinished);
finished = true;
}

// check if max scroll height has been reached
if (maxScrollHeight > 0 && scrolledDistance > maxScrollHeight) {
clearInterval(checkFinished);
finished = true;
}
}, CHECK_INTERVAL_MILLIS);

const doScroll = async () => {
Expand All @@ -554,6 +568,7 @@ export async function infiniteScroll(page: Page, options: InfiniteScrollOptions
const delta = bodyScrollHeight === 0 ? SCROLL_HEIGHT_IF_ZERO : bodyScrollHeight;

await page.mouse.wheel({ deltaY: delta });
scrolledDistance += delta;
};

const maybeClickButton = async () => {
Expand Down
18 changes: 16 additions & 2 deletions test/core/playwright_utils.test.ts
Expand Up @@ -266,21 +266,35 @@ describe('playwrightUtils', () => {
expect(after).toBe(true);
});

test('stopScrollCallback works', async () => {
test('maxScrollHeight works', async () => {
const before = await page.evaluate(isAtBottom);
expect(before).toBe(false);

await playwrightUtils.infiniteScroll(page, {
waitForSecs: Infinity,
maxScrollHeight: 1000,
stopScrollCallback: async () => true,
});

const after = await page.evaluate(isAtBottom);
// It scrolls to the bottom in the first scroll so this is correct.
// The test passes because the Infinite waitForSecs is broken by the callback.
// The test passes because the Infinite waitForSecs is broken by the height requirement.
// If it didn't, the test would time out.
expect(after).toBe(true);
});

test('stopScrollCallback works', async () => {
const before = await page.evaluate(isAtBottom);
expect(before).toBe(false);

await playwrightUtils.infiniteScroll(page, {
waitForSecs: Infinity,
stopScrollCallback: async () => true,
});

const after = await page.evaluate(isAtBottom);
expect(after).toBe(true);
});
});

test('saveSnapshot() works', async () => {
Expand Down
18 changes: 16 additions & 2 deletions test/core/puppeteer_utils.test.ts
Expand Up @@ -392,21 +392,35 @@ describe('puppeteerUtils', () => {
expect(after).toBe(true);
});

test('stopScrollCallback works', async () => {
test('maxScrollHeight works', async () => {
const before = await page.evaluate(isAtBottom);
expect(before).toBe(false);

await puppeteerUtils.infiniteScroll(page, {
waitForSecs: Infinity,
maxScrollHeight: 1000,
stopScrollCallback: async () => true,
});

const after = await page.evaluate(isAtBottom);
// It scrolls to the bottom in the first scroll so this is correct.
// The test passes because the Infinite waitForSecs is broken by the callback.
// The test passes because the Infinite waitForSecs is broken by the height requirement.
// If it didn't, the test would time out.
expect(after).toBe(true);
});

test('stopScrollCallback works', async () => {
const before = await page.evaluate(isAtBottom);
expect(before).toBe(false);

await puppeteerUtils.infiniteScroll(page, {
waitForSecs: Infinity,
stopScrollCallback: async () => true,
});

const after = await page.evaluate(isAtBottom);
expect(after).toBe(true);
});
});

it('saveSnapshot() works', async () => {
Expand Down

0 comments on commit 44997bb

Please sign in to comment.