Skip to content

Commit

Permalink
fix(getCrumb): handle consent when required i.e. in EU (#637)
Browse files Browse the repository at this point in the history
  • Loading branch information
gadicc committed May 8, 2023
1 parent a2ea88f commit a4f90f1
Show file tree
Hide file tree
Showing 11 changed files with 499 additions and 17 deletions.
6 changes: 5 additions & 1 deletion src/lib/cookieJar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export class MyCookieJar extends CookieJar {
url: string
) {
let cookies;
// console.log("setFromSetCookieHeaders", setCookieHeader);

if (typeof setCookieHeader === "undefined") {
// no-op
Expand All @@ -17,7 +18,10 @@ export class MyCookieJar extends CookieJar {

if (cookies)
for (const cookie of cookies)
if (cookie instanceof Cookie) this.setCookieSync(cookie, url);
if (cookie instanceof Cookie) {
// console.log("setCookieSync", cookie, url);
this.setCookieSync(cookie, url);
}
}
}

Expand Down
12 changes: 7 additions & 5 deletions src/lib/fetchDevel.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/* istanbul ignore file */
import nodeFetch, { Headers } from "node-fetch";
import fs from "fs";
import path from "path";
import crypto from "crypto";

//const FILE_BASE = path.join(__dirname, "..", "..", "tests", "http");
Expand All @@ -10,7 +9,10 @@ const BASE_URL = new URL("../../tests/http/", import.meta.url);
class FakeResponse {
constructor(props) {
Object.keys(props).forEach((key) => (this[key] = props[key]));
this.headers = new Headers(this.headers);
const rawHeaders = this.headers;
this.headers = new Headers(rawHeaders);
// node-fetch extension, needed to handle multiple set-cookie headers
this.headers.raw = () => rawHeaders;
}

async json() {
Expand Down Expand Up @@ -82,9 +84,9 @@ async function fetchDevel(url, fetchOptions) {
},
};

const contentType =
contentObj.response.headers["content-type"][0].split(";");
if (contentType[0] === "application/json") {
const contentTypeHeader = contentObj.response.headers["content-type"];
const contentType = contentTypeHeader && contentTypeHeader[0].split(";");
if (contentType === "application/json") {
contentObj.response.bodyJson = await res.json();
} else {
contentObj.response.body = await res.text();
Expand Down
17 changes: 17 additions & 0 deletions src/lib/getCrumb.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,23 @@ describe("getCrumb", () => {
)
).rejects.toThrowError(/Could not find crumb/);
});

it("redirect https://guce.yahoo.com/consent?brandType=nonEu", async () => {
// consoleRestore();
const fetch = await env.fetchDevel();

const crumb = await _getCrumb(
fetch,
// @ts-expect-error: fetchDevel still has no types (yet)
{ devel: true },
"https://finance.yahoo.com/quote/AAPL",
"getCrumb-quote-AAPL-pre-consent-VPN-UK.json",
true,
new MyCookieJar()
);
expect(crumb).toBe("Ky3Po5TGQRZ");
// consoleSilent();
});
});

describe("getCrumb", () => {
Expand Down
187 changes: 180 additions & 7 deletions src/lib/getCrumb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,38 @@ let crumb: string | null = null;
// let crumbFetchTime = 0;
// const MAX_CRUMB_CACHE_TIME = 60_000 * 60 * 24;

const parseHtmlEntities = (str: string) =>
str.replace(/&#x([0-9A-Fa-f]{1,3});/gi, (_, numStr) =>
String.fromCharCode(parseInt(numStr, 16))
);

export async function _getCrumb(
fetch: (url: RequestInfo, init?: RequestInit) => Promise<Response>,
fetchOptionsBase: RequestInit,
url = "https://finance.yahoo.com/quote/AAPL",
develOverride = "getCrumb-quote-AAPL.json",
noCache = false,
cookieJar = defaultCookieJar
) {
): Promise<string | null> {
// if (crumb && crumbFetchTime + MAX_CRUMB_CACHE_TIME > Date.now()) return crumb;

if (!noCache) {
if (crumb && !noCache) {
// If we still have a valid (non-expired) cookie, return the existing crumb.
const existingCookies = cookieJar.getCookiesSync(url, { expire: true });
if (existingCookies.length) return crumb;
}

function processSetCookieHeader(header: string[] | undefined, url: string) {
if (header) {
cookieJar.setFromSetCookieHeaders(header, url);
return true;
}
return false;
}

console.log("Fetching crumb and cookies from " + url + "...");

const fetchOptions = {
const fetchOptions: RequestInit & { devel: string } = {
...fetchOptionsBase,
headers: {
...fetchOptionsBase.headers,
Expand All @@ -32,20 +45,180 @@ export async function _getCrumb(
// This request will get our first cookies, so nothing to send.
// cookie: cookieJar.getCookieStringSync(url),
},
redirect: "manual",

devel:
// @ts-expect-error: fetchDevel still has no types (yet)
fetchOptionsBase.devel && develOverride,
};

const response = await fetch(url, fetchOptions);
const setCookieHeader = response.headers.get("set-cookie");
if (setCookieHeader) cookieJar.setFromSetCookieHeaders(setCookieHeader, url);
processSetCookieHeader(response.headers.raw()["set-cookie"], url);

// console.log(response.headers);
// console.log(setCookieHeader);
// console.log(response.headers.raw());
// console.log(cookieJar);

const location = response.headers.get("location");
if (location) {
if (location.match(/guce.yahoo/)) {
const consentFetchOptions: typeof fetchOptions = {
...fetchOptions,
headers: {
...fetchOptions.headers,
// GUCS=XXXXXXXX; Max-Age=1800; Domain=.yahoo.com; Path=/; Secure
cookie: cookieJar.getCookieStringSync(location),
},
devel: "getCrumb-quote-AAPL-consent.html",
};
// Returns 302 to collectConsent?sessionId=XXX
console.log("fetch", location /*, consentFetchOptions */);
const consentResponse = await fetch(location, consentFetchOptions);
const consentLocation = consentResponse.headers.get("location");

if (consentLocation) {
if (!consentLocation.match(/collectConsent/))
throw new Error("Unexpected redirect to " + consentLocation);

const collectConsentFetchOptions: typeof fetchOptions = {
...consentFetchOptions,
headers: {
...fetchOptions.headers,
cookie: cookieJar.getCookieStringSync(consentLocation),
},
devel: "getCrumb-quote-AAPL-collectConsent.html",
};
console.log("fetch", consentLocation /*, collectConsentFetchOptions */);

const collectConsentResponse = await fetch(
consentLocation,
collectConsentFetchOptions
);
const collectConsentBody = await collectConsentResponse.text();

const collectConsentResponseParams =
[
...collectConsentBody.matchAll(
/<input type="hidden" name="([^"]+)" value="([^"]+)">/g
),
]
.map(
([, name, value]) =>
`${name}=${encodeURIComponent(parseHtmlEntities(value))}&`
)
.join("") + "agree=agree&agree=agree";

const collectConsentSubmitFetchOptions: typeof fetchOptions = {
...consentFetchOptions,
headers: {
...fetchOptions.headers,
cookie: cookieJar.getCookieStringSync(consentLocation),
"content-type": "application/x-www-form-urlencoded",
},
method: "POST",
// body: "csrfToken=XjJfOYU&sessionId=3_cc-session_bd9a3b0c-c1b4-4aa8-8c18-7a82ec68a5d5&originalDoneUrl=https%3A%2F%2Ffinance.yahoo.com%2Fquote%2FAAPL%3Fguccounter%3D1&namespace=yahoo&agree=agree&agree=agree",
body: collectConsentResponseParams,
devel: "getCrumb-quote-AAPL-collectConsentSubmit",
};
console.log(
"fetch",
consentLocation /*, collectConsentSubmitFetchOptions */
);
const collectConsentSubmitResponse = await fetch(
consentLocation,
collectConsentSubmitFetchOptions
);

// Set-Cookie: CFC=AQABCAFkWkdkjEMdLwQ9&s=AQAAAClxdtC-&g=ZFj24w; Expires=Wed, 8 May 2024 01:18:54 GMT; Domain=consent.yahoo.com; Path=/; Secure
if (
!processSetCookieHeader(
collectConsentSubmitResponse.headers.raw()["set-cookie"],
consentLocation
)
)
throw new Error(
"No set-cookie header on collectConsentSubmitResponse, please report."
);

// https://guce.yahoo.com/copyConsent?sessionId=3_cc-session_04da10ea-1025-4676-8175-60d2508bfc6c&lang=en-GB
const collectConsentSubmitResponseLocation =
collectConsentSubmitResponse.headers.get("location");
if (!collectConsentSubmitResponseLocation)
throw new Error(
"collectConsentSubmitResponse unexpectedly did not return a Location header, please report."
);

const copyConsentFetchOptions: typeof fetchOptions = {
...consentFetchOptions,
headers: {
...fetchOptions.headers,
cookie: cookieJar.getCookieStringSync(
collectConsentSubmitResponseLocation
),
},
devel: "getCrumb-quote-AAPL-copyConsent",
};

console.log(
"fetch",
collectConsentSubmitResponseLocation /*, copyConsentFetchOptions */
);
const copyConsentResponse = await fetch(
collectConsentSubmitResponseLocation,
copyConsentFetchOptions
);

if (
!processSetCookieHeader(
copyConsentResponse.headers.raw()["set-cookie"],
collectConsentSubmitResponseLocation
)
)
throw new Error(
"No set-cookie header on copyConsentResponse, please report."
);

const copyConsentResponseLocation =
copyConsentResponse.headers.get("location");
if (!copyConsentResponseLocation)
throw new Error(
"collectConsentSubmitResponse unexpectedly did not return a Location header, please report."
);

const finalResponseFetchOptions: typeof fetchOptions = {
...fetchOptions,
headers: {
...fetchOptions.headers,
cookie: cookieJar.getCookieStringSync(
collectConsentSubmitResponseLocation
),
},
devel: "getCrumb-quote-AAPL-consent-final-redirect.html",
};

/*
console.log(
"fetch",
copyConsentResponseLocation,
finalResponseFetchOptions
);
*/

return await _getCrumb(
fetch,
finalResponseFetchOptions,
copyConsentResponseLocation,
"getCrumb-quote-AAPL-consent-final-redirect.html",
noCache,
cookieJar
);
}
} else {
throw new Error(
"Unsupported redirect to " + location + ", please report."
);
}
}

const cookie = cookieJar.getCookiesSync(url, { expire: true })[0];
if (cookie) {
console.log("Success. Cookie expires on " + cookie.expires);
Expand Down
11 changes: 7 additions & 4 deletions src/lib/yahooFinanceFetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,23 +103,26 @@ async function yahooFinanceFetch(
substituteVariables.call(this, urlBase) + "?" + urlSearchParams.toString();
// console.log(url);

// console.log(cookieJar.serializeSync());

const fetchOptions = {
...fetchOptionsBase,
headers: {
...fetchOptionsBase.headers,
cookie: cookieJar.getCookieStringSync(url),
cookie: cookieJar.getCookieStringSync(url, { allPaths: true }),
},
};

// console.log(fetchOptions);
// console.log("fetch", url, fetchOptions);

// used in moduleExec.ts
if (func === "csv") func = "text";

const response = (await queue.add(() => fetchFunc(url, fetchOptions))) as any;

const setCookieHeader = response.headers.get("set-cookie");
if (setCookieHeader) cookieJar.setFromSetCookieHeaders(setCookieHeader, url);
const setCookieHeaders = response.headers.raw()["set-cookie"];
if (setCookieHeaders)
cookieJar.setFromSetCookieHeaders(setCookieHeaders, url);

const result = await response[func]();

Expand Down
58 changes: 58 additions & 0 deletions tests/http/getCrumb-quote-AAPL-collectConsent.html

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions tests/http/getCrumb-quote-AAPL-collectConsentSubmit
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"request": {
"url": "https://consent.yahoo.com/v2/collectConsent?sessionId=3_cc-session_bd7a0720-7115-4809-82ad-97ac2fc34194"
},
"response": {
"ok": false,
"status": 302,
"statusText": "Found",
"headers": {
"connection": [
"close"
],
"set-cookie": [
"CFC=AQABCAFkWl5kgkMesQQu&s=AQAAADWfuY0v&g=ZFkRcQ; Expires=Wed, 8 May 2024 03:12:16 GMT; Domain=consent.yahoo.com; Path=/; Secure"
],
"server": [
"guce"
],
"strict-transport-security": [
"max-age=31536000; includeSubDomains"
],
"location": [
"https://guce.yahoo.com/copyConsent?sessionId=3_cc-session_bd7a0720-7115-4809-82ad-97ac2fc34194&lang=en-GB"
],
"content-length": [
"0"
],
"date": [
"Mon, 08 May 2023 15:12:39 GMT"
]
},
"body": ""
}
}

0 comments on commit a4f90f1

Please sign in to comment.