From 2f19494c043af8a903cfd4cb1aa91f5fd659a266 Mon Sep 17 00:00:00 2001 From: Gustavo Silva Date: Fri, 3 May 2024 19:06:39 +0100 Subject: [PATCH] fix(core): conversion between tough cookies and browser pool cookies This commit specifically fixes the conversion from tough cookies to browser pool cookies and vice versa, by correctly handling cookies where the domain has a leading dot versus when it doesn't. --- packages/core/src/cookie_utils.ts | 9 ++-- test/core/session_pool/session.test.ts | 66 ++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/packages/core/src/cookie_utils.ts b/packages/core/src/cookie_utils.ts index 74c8b7afc0bc..7bf2c8402fa7 100644 --- a/packages/core/src/cookie_utils.ts +++ b/packages/core/src/cookie_utils.ts @@ -45,7 +45,7 @@ export function toughCookieToBrowserPoolCookie(toughCookie: Cookie): CookieObjec // Puppeteer and Playwright expect 'expires' to be 'Unix time in seconds', not ms // If there is no expires date (so defaults to Infinity), we don't provide it to the browsers expires: toughCookie.expires === 'Infinity' ? undefined : new Date(toughCookie.expires).getTime() / 1000, - domain: toughCookie.domain ?? undefined, + domain: toughCookie.domain ? `${toughCookie.hostOnly ? '' : '.'}${toughCookie.domain}` : undefined, path: toughCookie.path ?? undefined, secure: toughCookie.secure, httpOnly: toughCookie.httpOnly, @@ -60,10 +60,8 @@ export function toughCookieToBrowserPoolCookie(toughCookie: Cookie): CookieObjec export function browserPoolCookieToToughCookie(cookieObject: CookieObject, maxAgeSecs: number) { const isExpiresValid = cookieObject.expires && typeof cookieObject.expires === 'number' && cookieObject.expires > 0; const expires = isExpiresValid ? new Date(cookieObject.expires! * 1000) : getDefaultCookieExpirationDate(maxAgeSecs); - const domain = typeof cookieObject.domain === 'string' && cookieObject.domain.startsWith('.') - ? cookieObject.domain.slice(1) - : cookieObject.domain; - + const domainHasLeadingDot = cookieObject.domain?.startsWith?.('.'); + const domain = domainHasLeadingDot ? cookieObject.domain?.slice?.(1) : cookieObject.domain; return new Cookie({ key: cookieObject.name, value: cookieObject.value, @@ -72,6 +70,7 @@ export function browserPoolCookieToToughCookie(cookieObject: CookieObject, maxAg path: cookieObject.path, secure: cookieObject.secure, httpOnly: cookieObject.httpOnly, + hostOnly: !domainHasLeadingDot, }); } diff --git a/test/core/session_pool/session.test.ts b/test/core/session_pool/session.test.ts index f36a0bf578a2..22e60d0d7089 100644 --- a/test/core/session_pool/session.test.ts +++ b/test/core/session_pool/session.test.ts @@ -1,6 +1,7 @@ import { EVENT_SESSION_RETIRED, ProxyConfiguration, Session, SessionPool } from '@crawlee/core'; import type { Dictionary } from '@crawlee/utils'; import { entries, sleep } from '@crawlee/utils'; +import { CookieJar } from 'tough-cookie'; describe('Session - testing session behaviour ', () => { let sessionPool: SessionPool; @@ -235,6 +236,71 @@ describe('Session - testing session behaviour ', () => { expect(session.getCookieString(url)).toBe('cookie2=your-cookie'); }); + test('setCookies works with hostOnly cookies', () => { + const url = 'https://www.example.com'; + const cookies = [ + { name: 'cookie1', value: 'my-cookie', domain: 'abc.example.com' }, + { name: 'cookie2', value: 'your-cookie', domain: 'example.com' }, + ]; + + session = new Session({ sessionPool }); + session.setCookies(cookies, url); + expect(session.getCookieString(url)).toBe(''); + expect(session.getCookieString('https://example.com')).toBe('cookie2=your-cookie'); + }); + + test('getCookies should work', () => { + const url = 'https://www.example.com'; + + session = new Session({ + sessionPool, + cookieJar: CookieJar.fromJSON(JSON.stringify({ + cookies: [ + { + 'key': 'foo', + 'value': 'bar', + 'domain': 'example.com', + 'path': '/', + 'hostOnly': false, + }, + ], + })), + }); + + expect(session.getCookies(url)).to.containSubset([{ + name: 'foo', + value: 'bar', + domain: '.example.com', + }]); + expect(session.getCookies(url)).to.deep.equal(session.getCookies('https://example.com')); + }); + + test('getCookies should work with hostOnly cookies', () => { + const url = 'https://www.example.com'; + + session = new Session({ + sessionPool, + cookieJar: CookieJar.fromJSON(JSON.stringify({ + cookies: [ + { + 'key': 'foo', + 'value': 'bar', + 'domain': 'example.com', + 'path': '/', + 'hostOnly': true, + }, + ], + })), + }); + + expect(session.getCookies(url)).toHaveLength(0); + expect(session.getCookies('https://example.com')).to.containSubset([{ + name: 'foo', + value: 'bar', + domain: 'example.com', + }]); + }); + describe('.putResponse & .getCookieString', () => { test('should set and update cookies from "set-cookie" header', () => { const headers: Dictionary = {};