diff --git a/.gitignore b/.gitignore index 8b19616..edf6beb 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,5 @@ node_modules # Users Environment Variables .lock-wscript -test.js \ No newline at end of file +test.js +.nyc_output/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index b154529..ca14ddd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,4 +7,6 @@ node_js: - 8 - 6 -sudo: false \ No newline at end of file +sudo: false + +after_success: npm run coverage \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c102d1..75962c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ ## Change Log +### v3.0.0 (07/03/2019) +- **BREAKING CHANGE**: `get/post` methods together with their signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod) +- **BREAKING CHANGE**: `cloudscraper.request` method is deprecated in favour of `cloudscraper(options)` +- Promise support has been added by using `request-promise` +- Error object are inherited from Error and have additional properties. + * `options` - The request options + * `cause` - An alias for `error` + * `response` - The request response +- Stacktraces are available in error objects +- `cloudflareTimeout` option can be defined to speed up waiting time +- Challenge evaluation is done in a sandbox to avoid potential secutiry issues +- Default [request methods](https://github.com/request/request#requestmethod) are available +- Custom cookie jar can now be passed [#103](https://github.com/codemanki/cloudscraper/issues/102) +- Proxies support [PR#101](https://github.com/codemanki/cloudscraper/pull/101) +- MIT license + +### v2.0.1 (02/03/2019) +- Minor documentation changes + ### v2.0.0 (09/12/2018) - [#2943](https://github.com/codemanki/cloudscraper/pull/66) Support recursive challenge solving. - **BREAKING CHANGE** Before this, when any error has been detected, the callback was called with an incorrect order: `callback(.., body, response);` instead of `return callback(..., response, body);` + diff --git a/Gruntfile.js b/Gruntfile.js deleted file mode 100644 index 4617377..0000000 --- a/Gruntfile.js +++ /dev/null @@ -1,20 +0,0 @@ -module.exports = function(grunt) { - - grunt.loadNpmTasks('grunt-mocha-test'); - - grunt.initConfig({ - mochaTest: { - test: { - options: { - globals: ['expect', 'sinon'], - reporter: 'spec', - quiet: false, - require: './specs/chai' - }, - src: ['specs/**/*.js'] - } - } - }); - - grunt.registerTask('default', ['mochaTest']); -}; diff --git a/README.md b/README.md index 3d848de..6b12b05 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,9 @@ Node.js library to bypass cloudflare's anti-ddos page. [![js-semistandard-style](https://cdn.rawgit.com/flet/semistandard/master/badge.svg)](https://github.com/Flet/semistandard) +[![Build status](https://img.shields.io/travis/codemanki/cloudscraper/master.svg?style=flat-square)](https://travis-ci.org/codemanki/cloudscraper) +[![Coverage](https://img.shields.io/coveralls/codemanki/cloudscraper.svg?style=flat-square)](https://coveralls.io/r/codemanki/cloudscraper) + This library is a port of python module [cloudflare-scrape](https://github.com/Anorov/cloudflare-scrape) with couple enhancements and test cases ;) . All grats to its author \m/ @@ -26,6 +29,46 @@ __Unfortunately, there is no support for handling a CAPTCHA, if the response con If you notice that for some reason cloudscraper stopped to work, do not hesitate and get in touch with me ( by creating an issue here, for example), so i can update it. +Migration from v2 to v3 +============ +- Replace `cloudscraper.request(options)` with `cloudscraper(options)` +- `cloudscraper.get()` and `cloudscraper.post()` method signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod): +``` +var options = { + uri: 'https://website.com/', + headers: {/*...*/} +}; + +cloudscraper.get(options, function(error, response, body) { + console.log(body); +}); +``` +or for **POST** +``` +var options = { + uri: 'https://website.com/', + headers: {/*...*/}, + formData: { field1: 'value', field2: 2 } +}; + +cloudscraper.post(options, function(error, response, body) { + console.log(body); +}); +``` +- If you are using custom promise support workarounds please remove them as cloudscraper now uses [request-promise](https://github.com/request/request-promise): + +``` +var cloudscraper = require('cloudscraper'); +var options = { + uri: 'https://website.com/', + method: 'GET' +}; + +cloudscraper(options).then(function(body) { + console.log(body); +}); +``` + Install ============ ```javascript @@ -37,7 +80,7 @@ Usage ```javascript var cloudscraper = require('cloudscraper'); -cloudscraper.get('http://website.com/', function(error, response, body) { +cloudscraper.get('https://website.com/', function(error, response, body) { if (error) { console.log('Error occurred'); } else { @@ -49,30 +92,78 @@ cloudscraper.get('http://website.com/', function(error, response, body) { or for `POST` action: ```javascript -cloudscraper.post('http://website.com/', {field1: 'value', field2: 2}, function(error, response, body) { - ... +var options = { + uri: 'https://website.com/', + formData: { field1: 'value', field2: 2 } +}; + +cloudscraper.post(options, function(error, response, body) { + console.log(body); }); ``` -A generic request can be made with `cloudscraper.request(options, callback)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option: +A generic request can be made with `cloudscraper(options, callback)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option: ```javascript -cloudscraper.request({method: 'GET', - url:'http://website.com/image', - encoding: null, - challengesToSolve: 3, // optional, if CF returns challenge after challenge, how many to solve before failing - followAllRedirects: true, // mandatory for successful challenge solution - }, function(err, response, body) { - //body is now a buffer object instead of a string +var options = { + method: 'GET', + url:'http://website.com/', +}; + +cloudscraper(options, function(err, response, body) { + console.log(response) }); ``` -## Error object -Error object has following structure: -``` -var error = {errorType: 0, error:...}; +## Advanced usage +Cloudscraper wraps request and request-promise, so using cloudscraper is pretty much like using those two libraries. + - Cloudscraper exposes [the same request methods as request](https://github.com/request/request#requestmethod): + `cloudscraper.get(options, callback)` + `cloudscraper.post(options, callback)` + `cloudscraper(uri)` + Please refer to request's documentation for further instructions + - Cloudscraper uses request-promise, promise chaining is done exactly the same as described in [docs](https://github.com/request/request-promise#cheat-sheet): + ``` + cloudscraper(options) + .then(function (htmlString) { + }) + .catch(function (err) { + }); + ``` + +## Default options +Cloudscraper exposes following options that areq required by default but might be changed. Please note that default options increase chances of correct work. + ``` +var options = { + uri: 'https://website', + jar: requestModule.jar(), // Custom cookie jar + headers: { + // User agent, Cache Control and Accept headers are required + 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', + 'Cache-Control': 'private', + 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + }, + // Cloudflare requires a delay of 5 seconds, so wait for at least 6. + cloudflareTimeout: 6000, + // followAllRedirects - follow non-GET HTTP 3xx responses as redirects + followAllRedirects: true, + // Support only this max challenges in row. If CF returns more, throw an error + challengesToSolve: 3 +}; + +cloudscraper(options, function(error, response, body) { + console.log(body) +}); +``` +## Error object +Cliudscraper error object inherits from `Error` has following fields: + * `name` - `RequestError`/`CaptchaError`/`CloudflareError`/`ParserError` + * `options` - The request options + * `cause` - An alias for `error` + * `response` - The request response + * `errorType` - Custom error code Where `errorType` can be following: - `0` if request to page failed due to some native reason as bad url, http connection or so. `error` in this case will be error [event](http://nodejs.org/api/http.html#http_class_http_server) - `1` cloudflare returned captcha. Nothing to do here. Bad luck @@ -80,7 +171,6 @@ Where `errorType` can be following: - `3` this error is returned when library failed to parse and solve js challenge. `error` will be `String` with some details. :warning: :warning: __Most likely it means that cloudflare have changed their js challenge.__ - `4` CF went into a loop and started to return challenge after challenge. If number of solved challenges is greater than `3` and another challenge is returned, throw an error - Running tests ============ Clone this repo, do `npm install` and then just `grunt` @@ -88,12 +178,9 @@ Clone this repo, do `npm install` and then just `grunt` ### Unknown error? Library stopped working? ### Let me know, by opening [issue](https://github.com/codemanki/cloudscraper/issues) in this repo and i will update library asap. Please, provide url and body of page where cloudscraper failed. - -CloudScraper uses [Request](https://github.com/request/request) to perform requests. - WAT =========== -Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get` you should expect it to return result after min 6 seconds. +Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as desceribed in above and pass `cloudflareTimeout` options with your value. But be aware that cloudflare might track this timeout and use it against you ;) ## TODO - [x] Check for recaptcha @@ -102,17 +189,20 @@ Current cloudflare implementation requires browser to respect the timeout of 5 s - [x] Add proper testing - [x] Remove manual 302 processing, replace with `followAllRedirects` param - [ ] Parse out the timeout from chalenge page - - [ ] Reoder the arguments in get/post/request methods and allow custom options to be passed in + - [x] Reoder the arguments in get/post/request methods and allow custom options to be passed in - [ ] Expose solve methods to use them independently - [ ] Support recaptcha solving - - [ ] Promisification + - [x] Promisification ## Kudos to contributors + - [Dwayne](https://github.com/pro-src) by himself rewrote the whole library, closed bunch of issues and feature requests. Praise him for 3.0.0 version ❤️ - [roflmuffin](https://github.com/roflmuffin) - [Colecf](https://github.com/Colecf) - [Jeongbong Seo](https://github.com/jngbng) - [Kamikadze4GAME](https://github.com/Kamikadze4GAME) ## Dependencies -* request https://github.com/request/request +* [request](https://github.com/request/request) +* [request-promise](https://github.com/request/request-promise) + diff --git a/errors.js b/errors.js new file mode 100644 index 0000000..21bff5e --- /dev/null +++ b/errors.js @@ -0,0 +1,90 @@ +'use strict'; + +// The purpose of this library is two-fold. +// 1. Have errors consistent with request/promise-core +// 2. Prevent request/promise core from wrapping our errors + +// There are two differences between these errors and the originals. +// 1. There is a non-enumerable errorType attribute. +// 2. The error constructor is hidden from the stacktrace. + +var EOL = require('os').EOL; +var BUG_REPORT = format([ + '### Cloudflare may have changed their technique, or there may be a bug.', + '### Bug Reports: https://github.com/codemanki/cloudscraper/issues', + '### Check the detailed exception message that follows for the cause.' +]); + +var original = require('request-promise-core/errors'); +var OriginalError = original.RequestError; + +var RequestError = create('RequestError', 0); +var CaptchaError = create('CaptchaError', 1); +var CloudflareError = create('CloudflareError', 2); +var ParserError = create('ParserError', 3); +// errorType 4 is a CloudflareError so that constructor is reused. + +// The following errors originate from promise-core and it's dependents. +// Give them an errorType for consistency. +original.StatusCodeError.prototype.errorType = 5; +original.TransformError.prototype.errorType = 6; + +// This replaces the RequestError for all libraries using request/promise-core +// and prevents silent failure. +Object.defineProperty(original, 'RequestError', { + configurable: true, + enumerable: true, + writable: true, + value: RequestError +}); + +// Export our custom errors along with StatusCodeError, etc. +Object.assign(module.exports, original, { + RequestError: RequestError, + CaptchaError: CaptchaError, + ParserError: ParserError, + CloudflareError: CloudflareError +}); + +function create(name, errorType) { + function CustomError(cause, options, response) { + + // This prevents nasty things e.g. `error.cause.error` and + // is why replacing the original RequestError is necessary. + if (cause instanceof OriginalError) { + return cause; + } + + OriginalError.apply(this, arguments); + + // Change the name to match this constructor + this.name = name; + + if (this instanceof ParserError) { + this.message = BUG_REPORT + this.message; + } + + if (Error.captureStackTrace) { // required for non-V8 environments + // Provide a proper stack trace that hides this constructor + Error.captureStackTrace(this, CustomError); + } + } + + CustomError.prototype = Object.create(OriginalError.prototype); + CustomError.prototype.constructor = CustomError; + // Keeps things stealthy by defining errorType on the prototype. + // This makes it non-enumerable and safer to add. + CustomError.prototype.errorType = errorType; + + Object.setPrototypeOf(CustomError, Object.getPrototypeOf(OriginalError)); + Object.defineProperty(CustomError, 'name', { + configurable: true, + value: name + }); + + return CustomError; +} + +function format(lines) { + return EOL + lines.join(EOL) + EOL + EOL; +} diff --git a/index.js b/index.js index a2c51b4..bbcdb53 100644 --- a/index.js +++ b/index.js @@ -1,220 +1,267 @@ var vm = require('vm'); -var requestModule = require('request'); -var jar = requestModule.jar(); - -var request = requestModule.defaults({jar: jar}); // Cookies should be enabled -var UserAgent = 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36'; -var Timeout = 6000; // Cloudflare requires a delay of 5 seconds, so wait for at least 6. -var cloudscraper = {}; -var MaxChallengesToSolve = 3; // Support only this max challenges in row. If CF returns more, throw an error - -/** - * Performs get request to url with headers. - * @param {String} url - * @param {Function} callback function(error, response, body) {} - * @param {Object} headers Hash with headers, e.g. {'Referer': 'http://google.com', 'User-Agent': '...'} - */ -cloudscraper.get = function(url, callback, headers) { - performRequest({ - method: 'GET', - url: url, - headers: headers - }, callback); +var requestModule = require('request-promise'); +var errors = require('./errors'); + +var VM_OPTIONS = { + timeout: 5000 }; -/** - * Performs post request to url with headers. - * @param {String} url - * @param {String|Object} body Will be passed as form data - * @param {Function} callback function(error, response, body) {} - * @param {Object} headers Hash with headers, e.g. {'Referer': 'http://google.com', 'User-Agent': '...'} - */ -cloudscraper.post = function(url, body, callback, headers) { - var data = ''; - var bodyType = Object.prototype.toString.call(body); - - if(bodyType === '[object String]') { - data = body; - } else if (bodyType === '[object Object]') { - data = Object.keys(body).map(function(key) { - return key + '=' + body[key]; - }).join('&'); - } +module.exports = defaults.call(requestModule); - headers = headers || {}; - headers['Content-Type'] = headers['Content-Type'] || 'application/x-www-form-urlencoded; charset=UTF-8'; - headers['Content-Length'] = headers['Content-Length'] || data.length; +function defaults(params) { + // isCloudScraper === !isRequestModule + var isRequestModule = this === requestModule; - performRequest({ - method: 'POST', - body: data, - url: url, - headers: headers - }, callback); -}; + var defaultParams = (!isRequestModule && this.defaultParams) || { + requester: requestModule, + // Cookies should be enabled + jar: requestModule.jar(), + headers: { + 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', + 'Cache-Control': 'private', + 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + }, + // Cloudflare requires a delay of 5 seconds, so wait for at least 6. + cloudflareTimeout: 6000, + // followAllRedirects - follow non-GET HTTP 3xx responses as redirects + followAllRedirects: true, + // Support only this max challenges in row. If CF returns more, throw an error + challengesToSolve: 3 + }; -/** - * Performs get or post request with generic request options - * @param {Object} options Object to be passed to request's options argument - * @param {Function} callback function(error, response, body) {} - */ -cloudscraper.request = function(options, callback) { - performRequest(options, callback); -}; + // Object.assign requires at least nodejs v4, request only test/supports v6+ + defaultParams = Object.assign({}, defaultParams, params); -function performRequest(options, callback) { - options = options || {}; - options.headers = options.headers || {}; + var cloudscraper = requestModule.defaults + .call(this, defaultParams, function(options) { + return performRequest(options, true); + }); - options.headers['Cache-Control'] = options.headers['Cache-Control'] || 'private'; - options.headers['Accept'] = options.headers['Accept'] || 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'; + // There's no safety net here, any changes apply to all future requests + // that are made with this instance and derived instances. + cloudscraper.defaultParams = defaultParams; - makeRequest = requestMethod(options.method); + // Ensure this instance gets a copy of our custom defaults function + // and afterwards, it will be copied over automatically. + if (isRequestModule) { + cloudscraper.defaults = defaults; + } + // Expose the debug option + Object.defineProperty(cloudscraper, 'debug', + Object.getOwnPropertyDescriptor(this, 'debug')); - //Can't just do the normal options.encoding || 'utf8' - //because null is a valid encoding. - if('encoding' in options) { - options.realEncoding = options.encoding; - } else { - options.realEncoding = 'utf8'; + return cloudscraper; +} + +// This function is wrapped to ensure that we get new options on first call. +// The options object is reused in subsequent calls when calling it directly. +function performRequest(options, isFirstRequest) { + // Prevent overwriting realEncoding in subsequent calls + if (!('realEncoding' in options)) { + // Can't just do the normal options.encoding || 'utf8' + // because null is a valid encoding. + if ('encoding' in options) { + options.realEncoding = options.encoding; + } else { + options.realEncoding = 'utf8'; + } } + options.encoding = null; - if (!options.url || !callback) { - throw new Error('To perform request, define both url and callback'); + if (isNaN(options.challengesToSolve)) { + throw new TypeError('Expected `challengesToSolve` option to be a number, ' + + 'got ' + typeof(options.challengesToSolve) + ' instead.'); } - options.headers['User-Agent'] = options.headers['User-Agent'] || UserAgent; - options.challengesToSolve = options.challengesToSolve || MaxChallengesToSolve; // Might not be the best way how to pass this variable - options.followAllRedirects = options.followAllRedirects === undefined ? true : options.followAllRedirects; + // This should be the default export of either request or request-promise. + var requester = options.requester; - makeRequest(options, function(error, response, body) { - processRequestResponse(options, {error: error, response: response, body: body}, callback); - }); + if (typeof requester !== 'function') { + throw new TypeError('Expected `requester` option to be a function, got ' + + typeof(requester) + ' instead.'); + } + + var request = requester(options); + + // If the requester is not request-promise, ensure we get a callback. + if (typeof request.callback !== 'function') { + throw new TypeError('Expected a callback function, got ' + + typeof(request.callback) + ' instead.'); + } + + // We only need the callback from the first request. + // The other callbacks can be safely ignored. + if (isFirstRequest) { + // This should be a user supplied callback or request-promise's callback. + // The callback is always wrapped/bound to the request instance. + options.callback = request.callback; + } + + // The error event only provides an error argument. + request.removeAllListeners('error') + .once('error', processRequestResponse.bind(null, options)); + // The complete event only provides response and body arguments. + request.removeAllListeners('complete') + .once('complete', processRequestResponse.bind(null, options, null)); + + // Indicate that this is a cloudscraper request, required by test/helper. + request.cloudscraper = true; + return request; } -function processRequestResponse(options, requestResult, callback) { - var error = requestResult.error; - var response = requestResult.response; - var body = requestResult.body; - var validationError; +// The argument convention is options first where possible, options +// always before response, and body always after response. +function processRequestResponse(options, error, response, body) { + var callback = options.callback; + var stringBody; var isChallengePresent; var isRedirectChallengePresent; - var isTargetPage; // Meaning we have finally reached the target page if (error || !body || !body.toString) { - return callback({ errorType: 0, error: error }, response, body); + // Pure request error (bad connection, wrong url, etc) + error = new errors.RequestError(error, options, response); + + return callback(error, response, body); } stringBody = body.toString('utf8'); - if (validationError = checkForErrors(error, stringBody)) { - return callback(validationError, response, body); + try { + validate(options, response, stringBody); + } catch (error) { + return callback(error, response, body); } isChallengePresent = stringBody.indexOf('a = document.getElementById(\'jschl-answer\');') !== -1; isRedirectChallengePresent = stringBody.indexOf('You are being redirected') !== -1 || stringBody.indexOf('sucuri_cloudproxy_js') !== -1; - isTargetPage = !isChallengePresent && !isRedirectChallengePresent; + // isTargetPage = !isChallengePresent && !isRedirectChallengePresent; + + if (isChallengePresent && options.challengesToSolve === 0) { + var cause = 'Cloudflare challenge loop'; + error = new errors.CloudflareError(cause, options, response); + error.errorType = 4; - if(isChallengePresent && options.challengesToSolve == 0) { - return callback({ errorType: 4 }, response, body); + return callback(error, response, body); } // If body contains specified string, solve challenge if (isChallengePresent) { setTimeout(function() { - solveChallenge(response, stringBody, options, callback); - }, Timeout); + solveChallenge(options, response, stringBody); + }, options.cloudflareTimeout); } else if (isRedirectChallengePresent) { - setCookieAndReload(response, stringBody, options, callback); + setCookieAndReload(options, response, stringBody); } else { // All is good - processResponseBody(options, error, response, body, callback); + processResponseBody(options, response, body); } } -function checkForErrors(error, body) { +function validate(options, response, body) { var match; - // Pure request error (bad connection, wrong url, etc) - if(error) { - return { errorType: 0, error: error }; - } - // Finding captcha if (body.indexOf('why_captcha') !== -1 || /cdn-cgi\/l\/chk_captcha/i.test(body)) { - return { errorType: 1 }; + throw new errors.CaptchaError('captcha', options, response); } - // trying to find '1006' + // Trying to find '1006' match = body.match(/<\w+\s+class="cf-error-code">(.*)<\/\w+>/i); if (match) { - return { errorType: 2, error: parseInt(match[1]) }; + var code = parseInt(match[1]); + throw new errors.CloudflareError(code, options, response); } return false; } -function solveChallenge(response, body, options, callback) { - var challenge = body.match(/name="jschl_vc" value="(\w+)"/); - var host = response.request.host; - var makeRequest = requestMethod(options.method); - var jsChlVc; - var answerResponse; - var answerUrl; +function solveChallenge(options, response, body) { + var callback = options.callback; - if (!challenge) { - return callback({errorType: 3, error: 'I cant extract challengeId (jschl_vc) from page'}, response, body); - } + var uri = response.request.uri; + // The JS challenge to be evaluated for answer/response. + var challenge; + // The result of challenge being evaluated in sandbox + var answer; + // The query string to send back to Cloudflare + // var payload = { jschl_vc, jschl_answer, pass }; + var payload = {}; - jsChlVc = challenge[1]; + var match; + var error; + var cause; - challenge = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i); + match = body.match(/name="jschl_vc" value="(\w+)"/); - if (!challenge) { - return callback({errorType: 3, error: 'I cant extract method from setTimeOut wrapper'}, response, body); + if (!match) { + cause = 'challengeId (jschl_vc) extraction failed'; + error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); } - challenge_pass = body.match(/name="pass" value="(.+?)"/)[1]; + payload.jschl_vc = match[1]; + + match = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i); - challenge = challenge[1]; + if (!match) { + cause = 'setTimeout callback extraction failed'; + error = new errors.ParserError(cause, options, response); - challenge = challenge.replace(/a\.value =(.+?) \+ .+?;/i, '$1'); + return callback(error, response, body); + } - challenge = challenge.replace(/\s{3,}[a-z](?: = |\.).+/g, ''); - challenge = challenge.replace(/'; \d+'/g, ''); + challenge = match[1] + .replace(/a\.value =(.+?) \+ .+?;/i, '$1') + .replace(/\s{3,}[a-z](?: = |\.).+/g, '') + .replace(/'; \d+'/g, ''); try { - answerResponse = { - 'jschl_vc': jsChlVc, - 'jschl_answer': (eval(challenge) + response.request.host.length), - 'pass': challenge_pass - }; - } catch (err) { - return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body); + answer = vm.runInNewContext(challenge, undefined, VM_OPTIONS); + payload.jschl_answer = answer + uri.hostname.length; + } catch (error) { + error.message = 'Challenge evaluation failed: ' + error.message; + error = new errors.ParserError(error, options, response); + + return callback(error, response, body); + } + + match = body.match(/name="pass" value="(.+?)"/); + + if (!match) { + cause = 'Attribute (pass) value extraction failed'; + error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); } - answerUrl = response.request.uri.protocol + '//' + host + '/cdn-cgi/l/chk_jschl'; + payload.pass = match[1]; - options.headers['Referer'] = response.request.uri.href; // Original url should be placed as referer - options.url = answerUrl; - options.qs = answerResponse; + // Prevent reusing the headers object to simplify unit testing. + options.headers = Object.assign({}, options.headers); + // Use the original uri as the referer and to construct the answer url. + options.headers['Referer'] = uri.href; + options.uri = uri.protocol + '//' + uri.hostname + '/cdn-cgi/l/chk_jschl'; + // Set the query string and decrement the number of challenges to solve. + options.qs = payload; options.challengesToSolve = options.challengesToSolve - 1; - // Make request with answer - makeRequest(options, function(error, response, body) { - processRequestResponse(options, {error: error, response: response, body: body}, callback); - }); + // Make request with answer. + performRequest(options, false); } -function setCookieAndReload(response, body, options, callback) { - var challenge = body.match(/S='([^']+)'/); - var makeRequest = requestMethod(options.method); +function setCookieAndReload(options, response, body) { + var callback = options.callback; + var challenge = body.match(/S='([^']+)'/); if (!challenge) { - return callback({errorType: 3, error: 'I cant extract cookie generation code from page'}, response, body); + var cause = 'Cookie code extraction failed'; + var error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); } var base64EncodedCode = challenge[1]; @@ -227,42 +274,40 @@ function setCookieAndReload(response, body, options, callback) { document: {} }; - vm.runInNewContext(cookieSettingCode, sandbox); - try { - jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); - } catch (err) { - return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body); + vm.runInNewContext(cookieSettingCode, sandbox, VM_OPTIONS); + + options.jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); + } catch (error) { + error.message = 'Cookie code evaluation failed: ' + error.message; + error = new errors.ParserError(error, options, response); + + return callback(error, response, body); } options.challengesToSolve = options.challengesToSolve - 1; - makeRequest(options, function(error, response, body) { - processRequestResponse(options, {error: error, response: response, body: body}, callback); - }); + performRequest(options, false); } -// Workaround for better testing. Request has pretty poor API -function requestMethod(method) { - // For now only GET and POST are supported - method = method.toUpperCase(); - - return method === 'POST' ? request.post : request.get; -} +function processResponseBody(options, response, body) { + var callback = options.callback; + var error = null; -function processResponseBody(options, error, response, body, callback) { if(typeof options.realEncoding === 'string') { body = body.toString(options.realEncoding); - // In case of real encoding, try to validate the response - // and find potential errors there. - // If encoding is not provided, return response as it is - if (validationError = checkForErrors(error, body)) { - return callback(validationError, response, body); + // The resolveWithFullResponse option will resolve with the response + // object. This changes the response.body so it is as expected. + response.body = body; + + // In case of real encoding, try to validate the response and find + // potential errors there, otherwise return the response as is. + try { + validate(options, response, body); + } catch (e) { + error = e; } } - callback(error, response, body); } - -module.exports = cloudscraper; diff --git a/mocha.opts b/mocha.opts new file mode 100644 index 0000000..e32c7e6 --- /dev/null +++ b/mocha.opts @@ -0,0 +1,2 @@ +--reporter spec +--require tests/common \ No newline at end of file diff --git a/package.json b/package.json index 83f4d1b..f5d8e92 100644 --- a/package.json +++ b/package.json @@ -1,10 +1,11 @@ { "name": "cloudscraper", - "version": "2.0.1", + "version": "3.0.0", "description": "Bypasses cloudflare's anti-ddos page", "main": "index.js", "scripts": { - "test": "grunt", + "test": "npm run lint && nyc --reporter=html --reporter=text mocha", + "coverage": "nyc report --reporter=text-lcov | coveralls", "lint": "eslint ." }, "repository": { @@ -24,20 +25,22 @@ "license": "MIT", "homepage": "https://github.com/codemanki/cloudscraper", "dependencies": { - "request": "^2.88.0" + "request": "^2.88.0", + "request-promise": "^4.2.4" }, "devDependencies": { + "chai": "^4.2.0", + "chai-as-promised": "^7.1.1", + "coveralls": "^3.0.3", "eslint": "^5.14.1", "eslint-config-standard": "^12.0.0", "eslint-plugin-import": "^2.16.0", "eslint-plugin-node": "^8.0.1", "eslint-plugin-promise": "^4.0.1", "eslint-plugin-standard": "^4.0.0", - "chai": "^1.10.0", - "grunt": "^0.4.5", - "grunt-cli": "^1.2.0", - "grunt-mocha-test": "^0.12.4", - "mocha": "^2.0.1", - "sinon": "^1.12.1" + "mocha": "^6.0.2", + "nyc": "^13.3.0", + "sinon": "^7.2.4", + "sinon-chai": "^3.3.0" } } diff --git a/specs/chai.js b/specs/chai.js deleted file mode 100644 index 43994da..0000000 --- a/specs/chai.js +++ /dev/null @@ -1,12 +0,0 @@ -var chai = require('chai'), - sinon = require('sinon'); - -chai.expect(); - -chai.config.includeStack = true; - -global.expect = chai.expect; -global.AssertionError = chai.AssertionError; -global.Assertion = chai.Assertion; -global.assert = chai.assert; -global.sinon = sinon; diff --git a/specs/spec_helper.js b/specs/spec_helper.js deleted file mode 100644 index c07e710..0000000 --- a/specs/spec_helper.js +++ /dev/null @@ -1,45 +0,0 @@ -var fs = require('fs'); -var urlLib = require('url'); -var path = require('path'); - -var testDefaults = { - url: 'http://example-site.dev/path/', - headers: {'User-Agent': 'Chrome'} -}; - -module.exports = { - getFixture: function(fileName) { - return fs.readFileSync('./specs/fixtures/' + fileName, 'utf8'); - }, - testDefaults: testDefaults, - // This method returns properly faked response object for request lib, which is used inside cloudscraper library - fakeResponseObject: function(statusCode, headers, body, url) { - var parsedUri = urlLib.parse(url); - parsedUri.uri = parsedUri; - - return { - statusCode: statusCode, - headers: headers, - body: body, - request: parsedUri //actually this is more compilcated object, but library uses only uri parts. - }; - }, - // Terrible hack. But because of request library API, it is impossible to normally stub it. That is why cloudscraper's index.js is removed from cache each time - dropCache: function() { - var pathToLib = path.normalize(__dirname + '/../index.js'); - if (require.cache[pathToLib]) { - delete require.cache[pathToLib]; - } - }, - requestParams: function(params) { - return Object.assign({ - method: 'GET', - url: testDefaults.url, - headers: testDefaults.headers, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 3 - }, params); - } -}; diff --git a/specs/tests/cloudscraper.js b/specs/tests/cloudscraper.js deleted file mode 100644 index b0f8d24..0000000 --- a/specs/tests/cloudscraper.js +++ /dev/null @@ -1,314 +0,0 @@ -var helper = require('../spec_helper'); -var request = require('request'); - -describe('Cloudscraper', function() { - var requestedPage = helper.getFixture('requested_page.html'); - var url = helper.testDefaults.url; - var headers = helper.testDefaults.headers; - - // Since request.jar returns new cookie jar instance, create one global instance and then stub it in beforeEach - var jar = request.jar(); - // Since request.defaults returns new wrapper, create one global instance and then stub it in beforeEach - var requestDefault = request.defaults({jar: jar}); - var defaultWithArgs = helper.requestParams({}); - - var cloudscraper; - var sandbox; - before(function() { - helper.dropCache(); - }); - - beforeEach(function () { - sandbox = sinon.sandbox.create(); - sandbox.stub(request, 'jar').returns(jar); - sandbox.stub(request, 'defaults').returns(requestDefault); - cloudscraper = require('../../index'); - // since cloudflare requires timeout, the module relies on setTimeout. It should be proprely stubbed to avoid ut running for too long - this.clock = sinon.useFakeTimers(); - }); - - afterEach(function () { - sandbox.restore(); - this.clock.restore(); - }); - - it('should return requested page, if cloudflare is disabled for page', function(done) { - var expectedResponse = { statusCode: 200 }; - - // Stub first call, which request makes to page. It should return requested page - sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({})) - .callsArgWith(1, null, expectedResponse, requestedPage); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(expectedResponse); - done(); - }, headers); - - }); - - it('should not trigged any error if recaptcha is present in page not protected by CF', function(done) { - var expectedResponse = { statusCode: 200 }; - var pageWithCaptcha = helper.getFixture('page_with_recaptcha.html'); - - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, expectedResponse, pageWithCaptcha); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(pageWithCaptcha); - expect(response).to.be.equal(expectedResponse); - done(); - }, headers); - - }); - - it('should resolve challenge (version as on 21.05.2015) and then return page', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_21_05_2015.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get will have challenge solution - // It should contain url, answer, headers with Referer - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: { - 'jschl_vc': '89cdff5eaa25923e0f26e29e5195dce9', - 'jschl_answer': 633 + 'example-site.dev'.length, // 633 is a answer to cloudflares js challenge in this particular case - 'pass': '1432194174.495-8TSfc235EQ' - }, - headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' - }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 2 - }) - .callsArgWith(1, null, response, requestedPage); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(response); - done(); - }, headers); - - this.clock.tick(7000); // tick the timeout - }); - - it('should resolve challenge (version as on 09.06.2016) and then return page', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get will have challenge solution - // It should contain url, answer, headers with Referer - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: { - 'jschl_vc': '346b959db0cfa38f9938acc11d6e1e6e', - 'jschl_answer': 6632 + 'example-site.dev'.length, // 6632 is a answer to cloudflares js challenge in this particular case - 'pass': '1465488330.6-N/NbGTg+IM' - }, - headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' - }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 2 - }) - .callsArgWith(1, null, response, requestedPage); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(response); - done(); - }, headers); - - this.clock.tick(7000); // tick the timeout - }); - - it('should resolve 2 consequent challenges', function(done) { - var jsChallengePage1 = helper.getFixture('js_challenge_03_12_2018_1.html'); - var jsChallengePage2 = helper.getFixture('js_challenge_03_12_2018_2.html'); - var responseJsChallengePage1 = helper.fakeResponseObject(503, headers, jsChallengePage1, url); - var responseJsChallengePage2 = helper.fakeResponseObject(503, headers, jsChallengePage2, url); - var stubbed; - - // First call and CF returns a challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, responseJsChallengePage1, jsChallengePage1); - - // We submit a solution to the first challenge, but CF decided to give us a second one - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: { - 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa', - 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge in this particular case - 'pass': '1543827239.915-44n9IE20mS' - }, - headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' - }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 2 - }) - .callsArgWith(1, null, responseJsChallengePage2, jsChallengePage2); - - // We submit a solution to the second challenge and CF returns requested page - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: { - 'jschl_vc': 'a41fee3a9f041fea01f0cbf3e8e4d29b', - 'jschl_answer': -1.9145049856 + 'example-site.dev'.length, // 1.9145049856 is a answer to cloudflares js challenge in this particular case - 'pass': '1543827246.024-hvxyNA3rOg' - }, - headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' - }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 1 - }) - .callsArgWith(1, null, responseJsChallengePage2, requestedPage); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(response); - done(); - }, headers); - - this.clock.tick(14000); // tick the timeout - }); - - it('should make post request with body as string', function(done) { - var expectedResponse = { statusCode: 200 }; - var body = 'form-data-body'; - var postHeaders = headers; - - postHeaders['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; - postHeaders['Content-Length'] = body.length; - - - // Stub first call, which request makes to page. It should return requested page - sandbox.stub(requestDefault, 'post') - .withArgs(helper.requestParams({url: url, method: 'POST', headers: postHeaders, body: body})) - .callsArgWith(1, null, expectedResponse, requestedPage); - - cloudscraper.post(url, body, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(expectedResponse); - done(); - }, headers); - }); - - it('should make post request with body as object', function(done) { - var expectedResponse = { statusCode: 200 }; - var rawBody = {a: '1', b: 2}; - var encodedBody = 'a=1&b=2'; - var postHeaders = headers; - - postHeaders['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; - postHeaders['Content-Length'] = encodedBody.length; - - // Stub first call, which request makes to page. It should return requested page - sandbox.stub(requestDefault, 'post') - .withArgs(helper.requestParams({url: url, method: 'POST', headers: postHeaders, body: encodedBody})) - .callsArgWith(1, null, expectedResponse, requestedPage); - - cloudscraper.post(url, rawBody, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(expectedResponse); - done(); - }, headers); - }); - - it('should return raw data when encoding is null', function(done) { - var expectedResponse = { statusCode: 200 }; - var requestedData = new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64'); - - sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({url: url, headers: headers, encoding: null, realEncoding: null})) - .callsArgWith(1, null, expectedResponse, requestedData); - - var options = { - method: 'GET', - url: url, - encoding: null, - headers: headers, - followAllRedirects: true - }; - - cloudscraper.request(options, function(error, response, body) { - expect(error).to.be.null(); - expect(response).to.be.equal(expectedResponse); - expect(body).to.be.equal(requestedData); - done(); - }); - }); - - it('should set the given cookie and then return page', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_cookie.html'); - var response = helper.fakeResponseObject(200, headers, jsChallengePage, url); - - // Cloudflare is enabled for site. - // It returns a redirecting page if a (session) cookie is unset. - sandbox.stub(requestDefault, 'get', function fakeGet(options, cb) { - if (options.url === url) { - var cookieString = jar.getCookieString(url); - if (cookieString === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a') { - cb(null, response, requestedPage); - } else { - cb(null, response, jsChallengePage); - } - } else { - cb(new Error("Unexpected request")); - } - }); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - done(); - }, headers); - }); -}); diff --git a/specs/tests/errors.js b/specs/tests/errors.js deleted file mode 100644 index c14ef17..0000000 --- a/specs/tests/errors.js +++ /dev/null @@ -1,221 +0,0 @@ -var helper = require('../spec_helper'); -var request = require('request'); - -describe('Cloudscraper', function() { - var sandbox; - var captchaPage = helper.getFixture('captcha.html'); - var accessDenied = helper.getFixture('access_denied.html'); - var invalidChallenge = helper.getFixture('invalid_js_challenge.html'); - var url = helper.testDefaults.url; - var headers = helper.testDefaults.headers; - - // Since request.defaults returns new wrapper, create one global instance and then stub it in beforeEach - var requestDefault = request.defaults({jar: true}); - var defaultWithArgs = helper.requestParams({}); - - var cloudscraper; - before(function() { - helper.dropCache(); - }); - - beforeEach(function () { - sandbox = sinon.sandbox.create(); - sandbox.stub(request, 'defaults').returns(requestDefault); - cloudscraper = require('../../index'); - // since cloudflare requires timeout, the module relies on setTimeout. It should be proprely stubbed to avoid ut running for too long - this.clock = sinon.useFakeTimers(); - }); - - afterEach(function () { - sandbox.restore(); - this.clock.restore(); - }); - - it('should return error if it was thrown by request', function(done) { - var response = { statusCode: 500 }, - fakeError = {fake: 'error'}; //not real request error, but it doesn't matter - - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, fakeError, response, ''); - - cloudscraper.get(url, function(error) { - expect(error).to.be.eql({errorType: 0, error: fakeError}); // errorType 0, means it is some kind of system error - done(); - }, headers); - - }); - - it('should return error if captcha is served by cloudflare', function(done){ - var response = { statusCode: 503 }; - - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, captchaPage); - - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 1}); // errorType 1, means captcha is served - expect(response).to.be.eql(captchaPage); - done(); - }, headers); - }); - - it('should return error if cloudflare returned some inner error', function(done){ - //https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 - var response = { statusCode: 500 }; - - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, accessDenied); - - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 2, error: 1006}); // errorType 2, means inner cloudflare error - expect(response).to.be.eql(accessDenied); - done(); - }, headers); - }); - - it('should return errior if cf presented more than 3 challenges in a row', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - - var pageWithCaptchaResponse = { statusCode: 200 }; - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({url: url, headers: headers})) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get returns challenge - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 2 - }) - .callsArgWith(1, null, response, jsChallengePage); - - // Third call to request.get returns challenge - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 1 - }) - .callsArgWith(1, null, response, jsChallengePage); - - // Fourth call to request.get still returns a challenge - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 0 - }) - .callsArgWith(1, null, response, jsChallengePage); - - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 4}); // errorType 1, means captcha is served - expect(response).to.be.eql(jsChallengePage); - done(); - }, headers); - - this.clock.tick(200000); // tick the timeout - }); - it('should return error if body is undefined', function(done){ - //https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 - var response = { statusCode: 500 }; - - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, undefined); - - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 0, error: null}); // errorType 2, means inner cloudflare error - expect(response).to.be.eql(undefined); - done(); - }, headers); - }); - - it('should return error if challenge page failed to be parsed', function(done) { - var response = helper.fakeResponseObject(200, headers, invalidChallenge, url); - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, invalidChallenge); - - cloudscraper.get(url, function(error, body, response) { - expect(error.errorType).to.be.eql(3); // errorType 3, means parsing failed - expect(response).to.be.eql(invalidChallenge); - done(); - }, headers); - - this.clock.tick(7000); // tick the timeout - }); - - it('should return error if it was thrown by request when solving challenge', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_21_05_2015.html'), - response = helper.fakeResponseObject(503, headers, jsChallengePage, url), - connectionError = {error: 'ECONNRESET'}, - stubbed; - - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .onCall(0) - .callsArgWith(1, null, response, jsChallengePage); - - stubbed - .onCall(1) - .callsArgWith(1, connectionError); - - cloudscraper.get(url, function(error) { - expect(error).to.be.eql({errorType: 0, error: connectionError}); // errorType 0, connection eror for example - done(); - }, headers); - - this.clock.tick(7000); // tick the timeout - }); - - it('should properly handle a case when after a challenge another one is returned', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - - var pageWithCaptchaResponse = { statusCode: 200 }; - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({url: url, headers: headers})) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get returns recaptcha - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 2 - }) - .callsArgWith(1, null, pageWithCaptchaResponse, captchaPage); - - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 1}); // errorType 1, means captcha is served - expect(response).to.be.eql(captchaPage); - done(); - }, headers); - - this.clock.tick(7000); // tick the timeout - }); -}); diff --git a/test/common.js b/test/common.js new file mode 100644 index 0000000..1c96a18 --- /dev/null +++ b/test/common.js @@ -0,0 +1,8 @@ +'use strict'; + +var chai = require('chai'); + +chai.use(require('sinon-chai')); +chai.use(require('chai-as-promised')); + +chai.config.includeStack = true; \ No newline at end of file diff --git a/specs/fixtures/access_denied.html b/test/fixtures/access_denied.html similarity index 100% rename from specs/fixtures/access_denied.html rename to test/fixtures/access_denied.html diff --git a/specs/fixtures/captcha.html b/test/fixtures/captcha.html similarity index 100% rename from specs/fixtures/captcha.html rename to test/fixtures/captcha.html diff --git a/specs/fixtures/invalid_js_challenge.html b/test/fixtures/invalid_js_challenge.html similarity index 100% rename from specs/fixtures/invalid_js_challenge.html rename to test/fixtures/invalid_js_challenge.html diff --git a/specs/fixtures/js_challenge_03_12_2018_1.html b/test/fixtures/js_challenge_03_12_2018_1.html similarity index 100% rename from specs/fixtures/js_challenge_03_12_2018_1.html rename to test/fixtures/js_challenge_03_12_2018_1.html diff --git a/specs/fixtures/js_challenge_03_12_2018_2.html b/test/fixtures/js_challenge_03_12_2018_2.html similarity index 100% rename from specs/fixtures/js_challenge_03_12_2018_2.html rename to test/fixtures/js_challenge_03_12_2018_2.html diff --git a/specs/fixtures/js_challenge_09_06_2016.html b/test/fixtures/js_challenge_09_06_2016.html similarity index 100% rename from specs/fixtures/js_challenge_09_06_2016.html rename to test/fixtures/js_challenge_09_06_2016.html diff --git a/specs/fixtures/js_challenge_21_05_2015.html b/test/fixtures/js_challenge_21_05_2015.html similarity index 100% rename from specs/fixtures/js_challenge_21_05_2015.html rename to test/fixtures/js_challenge_21_05_2015.html diff --git a/specs/fixtures/js_challenge_cookie.html b/test/fixtures/js_challenge_cookie.html similarity index 100% rename from specs/fixtures/js_challenge_cookie.html rename to test/fixtures/js_challenge_cookie.html diff --git a/specs/fixtures/page_with_recaptcha.html b/test/fixtures/page_with_recaptcha.html similarity index 100% rename from specs/fixtures/page_with_recaptcha.html rename to test/fixtures/page_with_recaptcha.html diff --git a/specs/fixtures/requested_page.html b/test/fixtures/requested_page.html similarity index 100% rename from specs/fixtures/requested_page.html rename to test/fixtures/requested_page.html diff --git a/test/helper.js b/test/helper.js new file mode 100644 index 0000000..70c602d --- /dev/null +++ b/test/helper.js @@ -0,0 +1,105 @@ +var request = require('./rp'); +var sinon = require('sinon'); +var fs = require('fs'); +var path = require('path'); + +var defaultParams = { + // Since cloudscraper wraps the callback, just ensure callback is a function + callback: sinon.match.func, + requester: sinon.match.func, + jar: request.jar(), + uri: 'http://example-site.dev/path/', + headers: { + "User-Agent": "Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36", + "Cache-Control": "private", + "Accept": "application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5" + }, + method: 'GET', + encoding: null, + realEncoding: 'utf8', + followAllRedirects: true, + cloudflareTimeout: 6000, + challengesToSolve: 3 +}; + +// Cache fixtures so they're not read from the fs but once +var cache = {}; + +module.exports = { + getFixture: function(fileName) { + if (cache[fileName] === undefined) { + cache[fileName] = fs.readFileSync(path.join(__dirname, 'fixtures', fileName), 'utf8'); + } + return cache[fileName]; + }, + defaultParams: defaultParams, + fakeResponse: function(template) { + return Object.assign({ + statusCode: 200, + headers: defaultParams.headers, + body: '' + }, template); + }, + extendParams: function(params) { + // Extend target with the default params and provided params + var target = Object.assign({}, defaultParams, params); + // Extend target.headers with defaults headers and provided headers + target.headers = Object.assign({}, defaultParams.headers, params.headers); + return target; + }, + fakeRequest: function(template) { + // In this context, fake is the request result + var fake = Object.assign({ + error: null, + // Set the default fake statusCode to 500 if an error is provided + response: { statusCode: template.error ? 500 : 200 } + }, template); + + // Use the body from fake response if the template doesn't provide it + if (!('body' in fake)) { + fake.body = fake.response.body; + } + + // Freeze the fake result and it's properties for more reliable tests. + Object.freeze(fake); + Object.keys(fake).forEach(function (key) { + if (!Object.isFrozen(fake[key]) && !Buffer.isBuffer(fake[key])) { + // Mark all existing properties as non-configurable and non-writable. + var target = fake[key]; + Object.keys(target).forEach(function (key) { + var desc = Object.getOwnPropertyDescriptor(target, key); + if (desc.configurable) { + desc.configurable = false; + if (desc.writable !== undefined) { + desc.writable = false; + } + Object.defineProperty(target, key, desc); + } + }); + } + }); + + return function Request(params) { + var instance = request(params); + + // This is a hack to prevent sending events to early. See #104 + Object.defineProperty(instance, 'cloudscraper', { + set: function() { + // Add the required convenience property to fake the response. + fake.response.request = this; + + if (fake.error !== null) { + this.emit('error', fake.error); + } else { + this.emit('complete', fake.response, fake.body); + } + }, + get: function() { + return true; + } + }); + + return instance; + }; + } +}; diff --git a/test/rp.js b/test/rp.js new file mode 100644 index 0000000..bd9462d --- /dev/null +++ b/test/rp.js @@ -0,0 +1,26 @@ +'use strict'; + +// Reproduces: https://github.com/request/request-promise/blob/6d11ddc63dde2462a8e39cd8d0b6956556b977f1/lib/rp.js +// It must be done this way because request-promise bypasses require.cache. + +var Bluebird = require('bluebird').getNewLibraryCopy(); +var configure = require('request-promise-core/configure/request2'); +var request = require('request'); + +// Replacing start with a noop prevents real requests from being made. +// Request -> Request.prototype.init -> Request.prototype.start +// The test/helper is responsible for calling back with a fake response. +request.Request.prototype.start = function(){}; + +configure({ + request: request, + PromiseImpl: Bluebird, + expose: [ + 'then', + 'catch', + 'finally', + 'promise' + ] +}); + +module.exports = request; diff --git a/test/test-errors.js b/test/test-errors.js new file mode 100644 index 0000000..75e3066 --- /dev/null +++ b/test/test-errors.js @@ -0,0 +1,441 @@ +'use strict'; + +var cloudscraper = require('../index'); +var request = require('request-promise'); +var errors = require('../errors'); +var helper = require('./helper'); + +var sinon = require('sinon'); +var expect = require('chai').expect; +var assert = require('chai').assert; + +describe('Cloudscraper', function() { + var uri = helper.defaultParams.uri; + var sandbox; + var Request; + + beforeEach(function () { + sandbox = sinon.createSandbox(); + // Prepare stubbed Request for each test + Request = sandbox.stub(request, 'Request'); + // setTimeout should be properly stubbed to prevent the unit test from running too long. + this.clock = sinon.useFakeTimers(); + }); + + afterEach(function () { + sandbox.restore(); + this.clock.restore(); + }); + + it('should return error if it was thrown by request', function(done) { + var fakeError = new Error('fake'); + + Request.callsFake(helper.fakeRequest({ error: fakeError })); + + var promise = cloudscraper.get(uri, function (error) { + expect(error).to.be.instanceOf(errors.RequestError); + expect(error).to.have.property('error', fakeError); + expect(error).to.have.property('errorType', 0); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + }); + + expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); + }); + + it('should return error if captcha is served by cloudflare', function(done) { + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('captcha.html') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + // errorType 1, means captcha is served + expect(error).to.be.instanceOf(errors.CaptchaError); + expect(error).to.have.property('error', 'captcha'); + expect(error).to.have.property('errorType', 1); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); + }); + + it('should return error if cloudflare returned some inner error', function(done) { + // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages + // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 + + var expectedResponse = helper.fakeResponse({ + statusCode: 500, + body: helper.getFixture('access_denied.html') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + // errorType 2, means inner cloudflare error + expect(error).to.be.instanceOf(errors.CloudflareError); + expect(error).to.have.property('error', 1006); + expect(error).to.have.property('errorType', 2); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); + }); + + it('should return error if cf presented more than 3 challenges in a row', function(done) { + // The expected params for all subsequent calls to Request + var expectedParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl' + }); + + // Perform less strict matching on headers and qs to simplify this test + Object.assign(expectedParams, { + headers: sinon.match.object, + qs: sinon.match.object + }); + + // Cloudflare is enabled for site. It returns a page with js challenge + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_09_06_2016.html') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.CloudflareError); + expect(error).to.have.property('error', 'Cloudflare challenge loop'); + expect(error).to.have.property('errorType', 4); + + assert.equal(Request.callCount, 4, 'Request call count'); + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + var total = helper.defaultParams.challengesToSolve + 1; + for (var i = 1; i < total; i++) { + // Decrement the number of challengesToSolve to match actual params + expectedParams.challengesToSolve -= 1; + expect(Request.getCall(i)).to.be.calledWithExactly(expectedParams); + } + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); + + // Tick the timeout + this.clock.tick(200000); + }); + + it('should return error if body is undefined', function(done) { + // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages + // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 + + Request.callsFake(helper.fakeRequest({ + response: {statusCode: 500} + })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.RequestError); + expect(error).to.have.property('error', null); + expect(error).to.have.property('errorType', 0); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(body).to.be.equal(undefined); + }); + + expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); + }); + + it('should return error if challenge page failed to be parsed', function(done) { + var expectedResponse = helper.fakeResponse({ + body: helper.getFixture('invalid_js_challenge.html') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error').that.is.ok; + expect(error).to.have.property('errorType', 3); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if js challenge has error during evaluation', function(done) { + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + // Adds a syntax error near the end of line 37 + expectedResponse.body = expectedResponse.body.replace(/\.toFixed/gm, '..toFixed'); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error').that.is.an('error'); + expect(error).to.have.property('errorType', 3); + expect(error.message).to.include('Challenge evaluation failed'); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if challengeId extraction fails', function(done) { + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + expectedResponse.body = expectedResponse.body.replace(/name="jschl_vc"/gm, ''); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error', 'challengeId (jschl_vc) extraction failed'); + expect(error).to.have.property('errorType', 3); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); + + + it('should return error if it was thrown by request when solving challenge', function(done) { + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_21_05_2015.html') + }); + + var fakeError = Object.assign(new Error('read ECONNRESET'), { + code: 'ECONNRESET', errno: 'ECONNRESET', syscall: 'read' + }); + + // Cloudflare is enabled for site. It returns a page with js challenge + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: expectedResponse })); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ error: fakeError })); + + var promise = cloudscraper.get(uri, function (error) { + // errorType 0, a connection error for example + expect(error).to.be.instanceOf(errors.RequestError); + expect(error).to.have.property('error', fakeError); + expect(error).to.have.property('errorType', 0); + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + }); + + expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); + + // tick the timeout + this.clock.tick(7000); + }); + + it('should properly handle a case when after a challenge another one is returned', function(done) { + // Cloudflare is enabled for site. It returns a page with js challenge + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_09_06_2016.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + // Second call to request.get returns recaptcha + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + challengesToSolve: 2 + }); + + // Perform less strict matching on headers and qs to simplify this test + Object.assign(secondParams, { + headers: sinon.match.object, + qs: sinon.match.object + }); + + var secondResponse = helper.fakeResponse({ + body: helper.getFixture('captcha.html') + }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + // errorType 1, means captcha is served + expect(error).to.be.instanceOf(errors.CaptchaError); + expect(error).to.have.property('error', 'captcha'); + expect(error).to.have.property('errorType', 1); + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if challenge page cookie extraction fails', function(done) { + // Cloudflare is enabled for site. + // It returns a redirecting page if a (session) cookie is unset. + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + // The cookie extraction codes looks for the `S` variable assignment + body: helper.getFixture('js_challenge_cookie.html').replace(/S=/gm, 'Z=') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error', 'Cookie code extraction failed'); + expect(error).to.have.property('errorType', 3); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); + }); + + it('should throw a TypeError if callback is not a function', function(done) { + var spy = sinon.spy(function() { + cloudscraper.get(uri); + }); + + expect(spy).to.throw(TypeError, /Expected a callback function/); + done(); + }); + + it('should throw a TypeError if requester is not a function', function (done) { + var spy = sinon.spy(function () { + cloudscraper.get({ requester: null }); + }); + + expect(spy).to.throw(TypeError, /`requester` option .*function/); + done(); + }); + + it('should throw a TypeError if challengesToSolve is not a number', function(done) { + var spy = sinon.spy(function() { + var options = { uri: uri, challengesToSolve: 'abc' }; + + cloudscraper.get(options, function(){}); + }); + + expect(spy).to.throw(TypeError, /`challengesToSolve` option .*number/); + done(); + }); + + it('should detect captcha in response body\'s real encoding', function (done) { + var firstParams = helper.extendParams({ + realEncoding: 'fake-encoding' + }); + + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: { + toString: function(encoding) { + if (encoding === 'fake-encoding') { + return helper.getFixture('captcha.html'); + } + + return 'fake response body'; + } + } + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var options = { uri: uri, encoding: 'fake-encoding' }; + + var promise = cloudscraper.get(options, function (error, response, body) { + // errorType 1, means captcha is served + expect(error).to.be.instanceOf(errors.CaptchaError); + expect(error).to.have.property('error', 'captcha'); + expect(error).to.have.property('errorType', 1); + + expect(Request).to.be.calledOnceWithExactly(firstParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body.toString('fake-encoding')); + }); + + expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if cookie setting code evaluation fails', function(done) { + // Change the cookie setting code so the vm will throw an error + var html = helper.getFixture('js_challenge_cookie.html'); + var b64 = (new Buffer('throw new Error(\'vm eval failed\');')).toString('base64'); + + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: html.replace(/S='([^']+)'/, 'S=\'' + b64 + '\'') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error').that.is.an('error'); + expect(error).to.have.property('errorType', 3); + expect(error.message).to.include('vm eval failed'); + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); +}); diff --git a/test/test-index.js b/test/test-index.js new file mode 100644 index 0000000..02cffa8 --- /dev/null +++ b/test/test-index.js @@ -0,0 +1,470 @@ +'use strict'; + +var cloudscraper = require('../index'); +var request = require('request-promise'); +var helper = require('./helper'); + +var sinon = require('sinon'); +var expect = require('chai').expect; + +describe('Cloudscraper', function () { + var requestedPage = helper.getFixture('requested_page.html'); + var uri = helper.defaultParams.uri; + var sandbox; + var Request; + + beforeEach(function () { + helper.defaultParams.jar = request.jar(); + sandbox = sinon.createSandbox(); + // Prepare stubbed Request for each test + Request = sandbox.stub(request, 'Request'); + // setTimeout should be properly stubbed to prevent the unit test from running too long. + this.clock = sinon.useFakeTimers(); + }); + + afterEach(function () { + sandbox.restore(); + this.clock.restore(); + }); + + it('should return requested page, if cloudflare is disabled for page', function (done) { + var expectedResponse = helper.fakeResponse({ + statusCode: 200, + body: requestedPage + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); + }); + + it('should not trigger any error if recaptcha is present in page not protected by CF', function (done) { + var expectedResponse = helper.fakeResponse({ + statusCode: 200, + body: helper.getFixture('page_with_recaptcha.html') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); + }); + + it('should resolve challenge (version as on 21.05.2015) and then return page', function (done) { + // Cloudflare is enabled for site. It returns a page with js challenge + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_21_05_2015.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + qs: { + 'jschl_vc': '89cdff5eaa25923e0f26e29e5195dce9', + // 633 is a answer to cloudflare's js challenge in this particular case + 'jschl_answer': 633 + 'example-site.dev'.length, + 'pass': '1432194174.495-8TSfc235EQ' + }, + headers: { + 'Referer': 'http://example-site.dev/path/' + }, + challengesToSolve: 2 + }); + + // Second call to Request will have challenge solution + // It should contain uri, answer, headers with Referer + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + Request.onSecondCall()// Cloudflare is enabled for site. It returns a page with js challenge + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + }); + + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + + // tick the timeout + this.clock.tick(7000); + }); + + it('should resolve challenge (version as on 09.06.2016) and then return page', function (done) { + // Cloudflare is enabled for site. It returns a page with js challenge + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_09_06_2016.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + qs: { + 'jschl_vc': '346b959db0cfa38f9938acc11d6e1e6e', + // 6632 is a answer to cloudflares js challenge in this particular case + 'jschl_answer': 6632 + 'example-site.dev'.length, + 'pass': '1465488330.6-N/NbGTg+IM' + }, + headers: { + 'Referer': 'http://example-site.dev/path/' + }, + challengesToSolve: 2 + }); + + // Second call to Request will have challenge solution + // It should contain uri, answer, headers with Referer + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + }); + + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + + this.clock.tick(7000); // tick the timeout + }); + + it('should resolve 2 consequent challenges', function (done) { + var firstParams = helper.extendParams({ resolveWithFullResponse: true }); + // First call and CF returns a challenge + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + resolveWithFullResponse: true, + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + qs: { + 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa', + 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge + // in this particular case + 'pass': '1543827239.915-44n9IE20mS' + }, + headers: { + 'Referer': 'http://example-site.dev/path/' + }, + challengesToSolve: 2 + }); + + // We submit a solution to the first challenge, but CF decided to give us a second one + var secondResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_2.html') + }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var thirdParams = helper.extendParams({ + resolveWithFullResponse: true, + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + qs: { + 'jschl_vc': 'a41fee3a9f041fea01f0cbf3e8e4d29b', + // 1.9145049856 is a answer to cloudflares js challenge in this particular case + 'jschl_answer': -1.9145049856 + 'example-site.dev'.length, + 'pass': '1543827246.024-hvxyNA3rOg' + }, + headers: { + 'Referer': 'http://example-site.dev/cdn-cgi/l/chk_jschl?jschl_vc=427c2b1cd4fba29608ee81b200e94bfa&jschl_answer=10.66734594&pass=1543827239.915-44n9IE20mS' + }, + challengesToSolve: 1 + }); + + var thirdResponse = helper.fakeResponse({ body: requestedPage }); + + // We submit a solution to the second challenge and CF returns requested page + Request.onThirdCall() + .callsFake(helper.fakeRequest({ response: thirdResponse })); + + var options = { uri: uri, resolveWithFullResponse: true }; + + var promise = cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledThrice; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + expect(Request.thirdCall).to.be.calledWithExactly(thirdParams); + + expect(response).to.be.equal(thirdResponse); + expect(body).to.be.equal(thirdResponse.body); + }); + + expect(promise).to.eventually.equal(thirdResponse).and.notify(done); + + this.clock.tick(14000); // tick the timeout + }); + + it('should make post request with formData', function (done) { + var formData = { some: 'data' }; + + var expectedParams = helper.extendParams({ + method: 'POST', + formData: formData + }); + // Stub first call, which request makes to page. It should return requested page + var expectedResponse = helper.fakeResponse({ body: requestedPage }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var options = { uri: uri, formData: formData }; + + var promise = cloudscraper.post(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnceWithExactly(expectedParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); + }); + + it('should make delete request', function (done) { + var expectedParams = helper.extendParams({ method: 'DELETE' }); + // Stub first call, which request makes to page. It should return requested page + var expectedResponse = helper.fakeResponse({ body: requestedPage }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.delete(uri, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnceWithExactly(expectedParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); + }); + + it('should return raw data when encoding is null', function (done) { + var expectedParams = helper.extendParams({ realEncoding: null }); + + var expectedResponse = helper.fakeResponse({ + body: new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var options = { uri: uri, encoding: null }; + + var promise = cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnceWithExactly(expectedParams); + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); + }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); + }); + + it('should set the given cookie and then return page', function (done) { + var firstResponse = helper.fakeResponse({ + body: helper.getFixture('js_challenge_cookie.html') + }); + + // Cloudflare is enabled for site. + // It returns a redirecting page if a (session) cookie is unset. + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ challengesToSolve: 2 }); + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + // Only callback with the second response if the cookie string matches + var matchCookie = sinon.match(function (params) { + return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a'; + }); + + // Prevent a matching error if for some reason params.jar is missing or invalid. + var matchParams = sinon.match.has('jar', sinon.match.object).and(matchCookie); + + Request.withArgs(matchParams) + .callsFake(helper.fakeRequest({ response: secondResponse })); + + // We need to override cloudscraper's default jar for this test + var options = { uri: uri, jar: helper.defaultParams.jar }; + + var promise = cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + }); + + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + }); + + it('should not use proxy\'s uri', function (done) { + + var firstParams = helper.extendParams({ + proxy: 'https://example-proxy-site.dev/path/' + }); + + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + proxy: 'https://example-proxy-site.dev/path/', + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + qs: { + 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa', + 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge + // in this particular case + 'pass': '1543827239.915-44n9IE20mS' + }, + headers: { + 'Referer': 'http://example-site.dev/path/' + }, + challengesToSolve: 2 + }); + + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var options = { uri: uri, proxy: 'https://example-proxy-site.dev/path/' }; + + var promise = cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + }); + + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + + this.clock.tick(14000); // tick the timeout + }); + + it('should reuse the provided cookie jar', function(done) { + var customJar = request.jar(); + + var firstParams = helper.extendParams({ jar: customJar }); + + var firstResponse = helper.fakeResponse({ + body: helper.getFixture('js_challenge_cookie.html') + }); + + // Cloudflare is enabled for site. + // It returns a redirecting page if a (session) cookie is unset. + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + jar: customJar, + challengesToSolve: 2 + }); + + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + // Only callback with the second response if the cookie string matches + var matchCookie = sinon.match(function (params) { + return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a'; + }); + + // Prevent a matching error if for some reason params.jar is missing or invalid. + var matchParams = sinon.match.has('jar', sinon.match.object).and(matchCookie); + + Request.withArgs(matchParams) + .callsFake(helper.fakeRequest({ response: secondResponse })); + + // We need to override cloudscraper's default jar for this test + var options = { uri: uri, jar: customJar }; + + customJar.setCookie('custom cookie', 'http://custom-site.dev/'); + + cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + + var customCookie = customJar.getCookieString('http://custom-site.dev/'); + expect(customCookie).to.equal('custom cookie'); + + cloudscraper.get(options, function(error, response, body) { + expect(error).to.be.null; + + expect(Request.thirdCall.args[0].jar).to.equal(customJar); + customCookie = customJar.getCookieString('http://custom-site.dev/'); + expect(customCookie).to.equal('custom cookie'); + + done(); + }); + }); + }); + + it('should define custom defaults function', function (done) { + expect(cloudscraper.defaults).to.not.equal(request.defaults); + + var custom = cloudscraper.defaults({ challengesToSolve: 5 }); + expect(custom.defaults).to.equal(cloudscraper.defaults); + done(); + }); +}); diff --git a/test/test-rp.js b/test/test-rp.js new file mode 100644 index 0000000..e43e40b --- /dev/null +++ b/test/test-rp.js @@ -0,0 +1,105 @@ +'use strict'; + +var cloudscraper = require('../index'); +var request = require('request-promise'); +var helper = require('./helper'); + +var sinon = require('sinon'); +var expect = require('chai').expect; + +describe('Cloudscraper promise', function () { + var requestedPage = helper.getFixture('requested_page.html'); + var uri = helper.defaultParams.uri; + var sandbox; + var Request; + + beforeEach(function () { + helper.defaultParams.jar = request.jar(); + sandbox = sinon.createSandbox(); + // Prepare stubbed Request for each test + Request = sandbox.stub(request, 'Request'); + // setTimeout should be properly stubbed to prevent the unit test from running too long. + this.clock = sinon.useFakeTimers(); + }); + + afterEach(function () { + sandbox.restore(); + this.clock.restore(); + }); + + it('should resolve with response body', function () { + var expectedResponse = helper.fakeResponse({ body: requestedPage }); + var expectedParams = helper.extendParams({ callback: undefined }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri); + + return promise.then(function (body) { + expect(Request).to.be.calledOnceWithExactly(expectedParams); + expect(body).to.be.equal(requestedPage); + }); + }); + + it('should resolve with full response', function () { + var expectedResponse = helper.fakeResponse({ + statusCode: 200, + body: requestedPage + }); + + var expectedParams = helper.extendParams({ + callback: undefined, + resolveWithFullResponse: true + }); + + // The method is implicitly GET + delete expectedParams.method; + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper({ + uri: uri, + resolveWithFullResponse: true + }); + + return promise.then(function (response) { + expect(Request).to.be.calledOnceWithExactly(expectedParams); + + expect(response).to.be.equal(expectedResponse); + expect(response.body).to.be.equal(requestedPage); + }); + }); + + // The helper calls the fake request callback synchronously. This results + // in the promise being rejected before we catch it in the test. + // This can be noticeable if we return the promise instead of calling done. + it('should define catch', function (done) { + var expectedResponse = helper.fakeResponse({ error: new Error('fake') }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var caught = false; + var promise = cloudscraper(uri); + + promise.catch(function () { + caught = true; + }).then(function () { + if (caught) done(); + }); + }); + + it('should define finally', function (done) { + var expectedResponse = helper.fakeResponse({ error: new Error('fake') }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var caught = false; + var promise = cloudscraper(uri); + + promise.then(function () { + caught = true; + }).finally(function () { + if (!caught) done(); + }); + }); +});