Skip to content

Commit

Permalink
fix(core): conversion between tough cookies and browser pool cookies
Browse files Browse the repository at this point in the history
This commit specifically fixes the conversion from tough cookies to browser pool cookies and vice versa, by correctly handling cookies where the domain has a leading dot versus when it doesn't.
  • Loading branch information
silva95gustavo committed May 4, 2024
1 parent 71b5ab1 commit 2f19494
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 5 deletions.
9 changes: 4 additions & 5 deletions packages/core/src/cookie_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export function toughCookieToBrowserPoolCookie(toughCookie: Cookie): CookieObjec
// Puppeteer and Playwright expect 'expires' to be 'Unix time in seconds', not ms
// If there is no expires date (so defaults to Infinity), we don't provide it to the browsers
expires: toughCookie.expires === 'Infinity' ? undefined : new Date(toughCookie.expires).getTime() / 1000,
domain: toughCookie.domain ?? undefined,
domain: toughCookie.domain ? `${toughCookie.hostOnly ? '' : '.'}${toughCookie.domain}` : undefined,
path: toughCookie.path ?? undefined,
secure: toughCookie.secure,
httpOnly: toughCookie.httpOnly,
Expand All @@ -60,10 +60,8 @@ export function toughCookieToBrowserPoolCookie(toughCookie: Cookie): CookieObjec
export function browserPoolCookieToToughCookie(cookieObject: CookieObject, maxAgeSecs: number) {
const isExpiresValid = cookieObject.expires && typeof cookieObject.expires === 'number' && cookieObject.expires > 0;
const expires = isExpiresValid ? new Date(cookieObject.expires! * 1000) : getDefaultCookieExpirationDate(maxAgeSecs);
const domain = typeof cookieObject.domain === 'string' && cookieObject.domain.startsWith('.')
? cookieObject.domain.slice(1)
: cookieObject.domain;

const domainHasLeadingDot = cookieObject.domain?.startsWith?.('.');
const domain = domainHasLeadingDot ? cookieObject.domain?.slice?.(1) : cookieObject.domain;
return new Cookie({
key: cookieObject.name,
value: cookieObject.value,
Expand All @@ -72,6 +70,7 @@ export function browserPoolCookieToToughCookie(cookieObject: CookieObject, maxAg
path: cookieObject.path,
secure: cookieObject.secure,
httpOnly: cookieObject.httpOnly,
hostOnly: !domainHasLeadingDot,
});
}

Expand Down
66 changes: 66 additions & 0 deletions test/core/session_pool/session.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { EVENT_SESSION_RETIRED, ProxyConfiguration, Session, SessionPool } from '@crawlee/core';
import type { Dictionary } from '@crawlee/utils';
import { entries, sleep } from '@crawlee/utils';
import { CookieJar } from 'tough-cookie';

describe('Session - testing session behaviour ', () => {
let sessionPool: SessionPool;
Expand Down Expand Up @@ -235,6 +236,71 @@ describe('Session - testing session behaviour ', () => {
expect(session.getCookieString(url)).toBe('cookie2=your-cookie');
});

test('setCookies works with hostOnly cookies', () => {
const url = 'https://www.example.com';
const cookies = [
{ name: 'cookie1', value: 'my-cookie', domain: 'abc.example.com' },
{ name: 'cookie2', value: 'your-cookie', domain: 'example.com' },
];

session = new Session({ sessionPool });
session.setCookies(cookies, url);
expect(session.getCookieString(url)).toBe('');
expect(session.getCookieString('https://example.com')).toBe('cookie2=your-cookie');
});

test('getCookies should work', () => {
const url = 'https://www.example.com';

session = new Session({
sessionPool,
cookieJar: CookieJar.fromJSON(JSON.stringify({
cookies: [
{
'key': 'foo',
'value': 'bar',
'domain': 'example.com',
'path': '/',
'hostOnly': false,
},
],
})),
});

expect(session.getCookies(url)).to.containSubset([{
name: 'foo',
value: 'bar',
domain: '.example.com',
}]);
expect(session.getCookies(url)).to.deep.equal(session.getCookies('https://example.com'));
});

test('getCookies should work with hostOnly cookies', () => {
const url = 'https://www.example.com';

session = new Session({
sessionPool,
cookieJar: CookieJar.fromJSON(JSON.stringify({
cookies: [
{
'key': 'foo',
'value': 'bar',
'domain': 'example.com',
'path': '/',
'hostOnly': true,
},
],
})),
});

expect(session.getCookies(url)).toHaveLength(0);
expect(session.getCookies('https://example.com')).to.containSubset([{
name: 'foo',
value: 'bar',
domain: 'example.com',
}]);
});

describe('.putResponse & .getCookieString', () => {
test('should set and update cookies from "set-cookie" header', () => {
const headers: Dictionary<string | string[]> = {};
Expand Down

0 comments on commit 2f19494

Please sign in to comment.