diff --git a/.gitignore b/.gitignore
index 8b19616..edf6beb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,4 +27,5 @@ node_modules
# Users Environment Variables
.lock-wscript
-test.js
\ No newline at end of file
+test.js
+.nyc_output/
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index b154529..ca14ddd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,4 +7,6 @@ node_js:
- 8
- 6
-sudo: false
\ No newline at end of file
+sudo: false
+
+after_success: npm run coverage
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2c102d1..75962c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
## Change Log
+### v3.0.0 (07/03/2019)
+- **BREAKING CHANGE**: `get/post` methods together with their signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod)
+- **BREAKING CHANGE**: `cloudscraper.request` method is deprecated in favour of `cloudscraper(options)`
+- Promise support has been added by using `request-promise`
+- Error object are inherited from Error and have additional properties.
+ * `options` - The request options
+ * `cause` - An alias for `error`
+ * `response` - The request response
+- Stacktraces are available in error objects
+- `cloudflareTimeout` option can be defined to speed up waiting time
+- Challenge evaluation is done in a sandbox to avoid potential secutiry issues
+- Default [request methods](https://github.com/request/request#requestmethod) are available
+- Custom cookie jar can now be passed [#103](https://github.com/codemanki/cloudscraper/issues/102)
+- Proxies support [PR#101](https://github.com/codemanki/cloudscraper/pull/101)
+- MIT license
+
+### v2.0.1 (02/03/2019)
+- Minor documentation changes
+
### v2.0.0 (09/12/2018)
- [#2943](https://github.com/codemanki/cloudscraper/pull/66) Support recursive challenge solving.
- **BREAKING CHANGE** Before this, when any error has been detected, the callback was called with an incorrect order: `callback(.., body, response);` instead of `return callback(..., response, body);`
+
diff --git a/Gruntfile.js b/Gruntfile.js
deleted file mode 100644
index 4617377..0000000
--- a/Gruntfile.js
+++ /dev/null
@@ -1,20 +0,0 @@
-module.exports = function(grunt) {
-
- grunt.loadNpmTasks('grunt-mocha-test');
-
- grunt.initConfig({
- mochaTest: {
- test: {
- options: {
- globals: ['expect', 'sinon'],
- reporter: 'spec',
- quiet: false,
- require: './specs/chai'
- },
- src: ['specs/**/*.js']
- }
- }
- });
-
- grunt.registerTask('default', ['mochaTest']);
-};
diff --git a/README.md b/README.md
index 3d848de..6b12b05 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,9 @@ Node.js library to bypass cloudflare's anti-ddos page.
[![js-semistandard-style](https://cdn.rawgit.com/flet/semistandard/master/badge.svg)](https://github.com/Flet/semistandard)
+[![Build status](https://img.shields.io/travis/codemanki/cloudscraper/master.svg?style=flat-square)](https://travis-ci.org/codemanki/cloudscraper)
+[![Coverage](https://img.shields.io/coveralls/codemanki/cloudscraper.svg?style=flat-square)](https://coveralls.io/r/codemanki/cloudscraper)
+
This library is a port of python module [cloudflare-scrape](https://github.com/Anorov/cloudflare-scrape) with couple enhancements and test cases ;)
. All grats to its author \m/
@@ -26,6 +29,46 @@ __Unfortunately, there is no support for handling a CAPTCHA, if the response con
If you notice that for some reason cloudscraper stopped to work, do not hesitate and get in touch with me ( by creating an issue here, for example), so i can update it.
+Migration from v2 to v3
+============
+- Replace `cloudscraper.request(options)` with `cloudscraper(options)`
+- `cloudscraper.get()` and `cloudscraper.post()` method signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod):
+```
+var options = {
+ uri: 'https://website.com/',
+ headers: {/*...*/}
+};
+
+cloudscraper.get(options, function(error, response, body) {
+ console.log(body);
+});
+```
+or for **POST**
+```
+var options = {
+ uri: 'https://website.com/',
+ headers: {/*...*/},
+ formData: { field1: 'value', field2: 2 }
+};
+
+cloudscraper.post(options, function(error, response, body) {
+ console.log(body);
+});
+```
+- If you are using custom promise support workarounds please remove them as cloudscraper now uses [request-promise](https://github.com/request/request-promise):
+
+```
+var cloudscraper = require('cloudscraper');
+var options = {
+ uri: 'https://website.com/',
+ method: 'GET'
+};
+
+cloudscraper(options).then(function(body) {
+ console.log(body);
+});
+```
+
Install
============
```javascript
@@ -37,7 +80,7 @@ Usage
```javascript
var cloudscraper = require('cloudscraper');
-cloudscraper.get('http://website.com/', function(error, response, body) {
+cloudscraper.get('https://website.com/', function(error, response, body) {
if (error) {
console.log('Error occurred');
} else {
@@ -49,30 +92,78 @@ cloudscraper.get('http://website.com/', function(error, response, body) {
or for `POST` action:
```javascript
-cloudscraper.post('http://website.com/', {field1: 'value', field2: 2}, function(error, response, body) {
- ...
+var options = {
+ uri: 'https://website.com/',
+ formData: { field1: 'value', field2: 2 }
+};
+
+cloudscraper.post(options, function(error, response, body) {
+ console.log(body);
});
```
-A generic request can be made with `cloudscraper.request(options, callback)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option:
+A generic request can be made with `cloudscraper(options, callback)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option:
```javascript
-cloudscraper.request({method: 'GET',
- url:'http://website.com/image',
- encoding: null,
- challengesToSolve: 3, // optional, if CF returns challenge after challenge, how many to solve before failing
- followAllRedirects: true, // mandatory for successful challenge solution
- }, function(err, response, body) {
- //body is now a buffer object instead of a string
+var options = {
+ method: 'GET',
+ url:'http://website.com/',
+};
+
+cloudscraper(options, function(err, response, body) {
+ console.log(response)
});
```
-## Error object
-Error object has following structure:
-```
-var error = {errorType: 0, error:...};
+## Advanced usage
+Cloudscraper wraps request and request-promise, so using cloudscraper is pretty much like using those two libraries.
+ - Cloudscraper exposes [the same request methods as request](https://github.com/request/request#requestmethod):
+ `cloudscraper.get(options, callback)`
+ `cloudscraper.post(options, callback)`
+ `cloudscraper(uri)`
+ Please refer to request's documentation for further instructions
+ - Cloudscraper uses request-promise, promise chaining is done exactly the same as described in [docs](https://github.com/request/request-promise#cheat-sheet):
+ ```
+ cloudscraper(options)
+ .then(function (htmlString) {
+ })
+ .catch(function (err) {
+ });
+ ```
+
+## Default options
+Cloudscraper exposes following options that areq required by default but might be changed. Please note that default options increase chances of correct work.
+
```
+var options = {
+ uri: 'https://website',
+ jar: requestModule.jar(), // Custom cookie jar
+ headers: {
+ // User agent, Cache Control and Accept headers are required
+ 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36',
+ 'Cache-Control': 'private',
+ 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
+ },
+ // Cloudflare requires a delay of 5 seconds, so wait for at least 6.
+ cloudflareTimeout: 6000,
+ // followAllRedirects - follow non-GET HTTP 3xx responses as redirects
+ followAllRedirects: true,
+ // Support only this max challenges in row. If CF returns more, throw an error
+ challengesToSolve: 3
+};
+
+cloudscraper(options, function(error, response, body) {
+ console.log(body)
+});
+```
+## Error object
+Cliudscraper error object inherits from `Error` has following fields:
+ * `name` - `RequestError`/`CaptchaError`/`CloudflareError`/`ParserError`
+ * `options` - The request options
+ * `cause` - An alias for `error`
+ * `response` - The request response
+ * `errorType` - Custom error code
Where `errorType` can be following:
- `0` if request to page failed due to some native reason as bad url, http connection or so. `error` in this case will be error [event](http://nodejs.org/api/http.html#http_class_http_server)
- `1` cloudflare returned captcha. Nothing to do here. Bad luck
@@ -80,7 +171,6 @@ Where `errorType` can be following:
- `3` this error is returned when library failed to parse and solve js challenge. `error` will be `String` with some details. :warning: :warning: __Most likely it means that cloudflare have changed their js challenge.__
- `4` CF went into a loop and started to return challenge after challenge. If number of solved challenges is greater than `3` and another challenge is returned, throw an error
-
Running tests
============
Clone this repo, do `npm install` and then just `grunt`
@@ -88,12 +178,9 @@ Clone this repo, do `npm install` and then just `grunt`
### Unknown error? Library stopped working? ###
Let me know, by opening [issue](https://github.com/codemanki/cloudscraper/issues) in this repo and i will update library asap. Please, provide url and body of page where cloudscraper failed.
-
-CloudScraper uses [Request](https://github.com/request/request) to perform requests.
-
WAT
===========
-Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get` you should expect it to return result after min 6 seconds.
+Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as desceribed in above and pass `cloudflareTimeout` options with your value. But be aware that cloudflare might track this timeout and use it against you ;)
## TODO
- [x] Check for recaptcha
@@ -102,17 +189,20 @@ Current cloudflare implementation requires browser to respect the timeout of 5 s
- [x] Add proper testing
- [x] Remove manual 302 processing, replace with `followAllRedirects` param
- [ ] Parse out the timeout from chalenge page
- - [ ] Reoder the arguments in get/post/request methods and allow custom options to be passed in
+ - [x] Reoder the arguments in get/post/request methods and allow custom options to be passed in
- [ ] Expose solve methods to use them independently
- [ ] Support recaptcha solving
- - [ ] Promisification
+ - [x] Promisification
## Kudos to contributors
+ - [Dwayne](https://github.com/pro-src) by himself rewrote the whole library, closed bunch of issues and feature requests. Praise him for 3.0.0 version ❤️
- [roflmuffin](https://github.com/roflmuffin)
- [Colecf](https://github.com/Colecf)
- [Jeongbong Seo](https://github.com/jngbng)
- [Kamikadze4GAME](https://github.com/Kamikadze4GAME)
## Dependencies
-* request https://github.com/request/request
+* [request](https://github.com/request/request)
+* [request-promise](https://github.com/request/request-promise)
+
diff --git a/errors.js b/errors.js
new file mode 100644
index 0000000..21bff5e
--- /dev/null
+++ b/errors.js
@@ -0,0 +1,90 @@
+'use strict';
+
+// The purpose of this library is two-fold.
+// 1. Have errors consistent with request/promise-core
+// 2. Prevent request/promise core from wrapping our errors
+
+// There are two differences between these errors and the originals.
+// 1. There is a non-enumerable errorType attribute.
+// 2. The error constructor is hidden from the stacktrace.
+
+var EOL = require('os').EOL;
+var BUG_REPORT = format([
+ '### Cloudflare may have changed their technique, or there may be a bug.',
+ '### Bug Reports: https://github.com/codemanki/cloudscraper/issues',
+ '### Check the detailed exception message that follows for the cause.'
+]);
+
+var original = require('request-promise-core/errors');
+var OriginalError = original.RequestError;
+
+var RequestError = create('RequestError', 0);
+var CaptchaError = create('CaptchaError', 1);
+var CloudflareError = create('CloudflareError', 2);
+var ParserError = create('ParserError', 3);
+// errorType 4 is a CloudflareError so that constructor is reused.
+
+// The following errors originate from promise-core and it's dependents.
+// Give them an errorType for consistency.
+original.StatusCodeError.prototype.errorType = 5;
+original.TransformError.prototype.errorType = 6;
+
+// This replaces the RequestError for all libraries using request/promise-core
+// and prevents silent failure.
+Object.defineProperty(original, 'RequestError', {
+ configurable: true,
+ enumerable: true,
+ writable: true,
+ value: RequestError
+});
+
+// Export our custom errors along with StatusCodeError, etc.
+Object.assign(module.exports, original, {
+ RequestError: RequestError,
+ CaptchaError: CaptchaError,
+ ParserError: ParserError,
+ CloudflareError: CloudflareError
+});
+
+function create(name, errorType) {
+ function CustomError(cause, options, response) {
+
+ // This prevents nasty things e.g. `error.cause.error` and
+ // is why replacing the original RequestError is necessary.
+ if (cause instanceof OriginalError) {
+ return cause;
+ }
+
+ OriginalError.apply(this, arguments);
+
+ // Change the name to match this constructor
+ this.name = name;
+
+ if (this instanceof ParserError) {
+ this.message = BUG_REPORT + this.message;
+ }
+
+ if (Error.captureStackTrace) { // required for non-V8 environments
+ // Provide a proper stack trace that hides this constructor
+ Error.captureStackTrace(this, CustomError);
+ }
+ }
+
+ CustomError.prototype = Object.create(OriginalError.prototype);
+ CustomError.prototype.constructor = CustomError;
+ // Keeps things stealthy by defining errorType on the prototype.
+ // This makes it non-enumerable and safer to add.
+ CustomError.prototype.errorType = errorType;
+
+ Object.setPrototypeOf(CustomError, Object.getPrototypeOf(OriginalError));
+ Object.defineProperty(CustomError, 'name', {
+ configurable: true,
+ value: name
+ });
+
+ return CustomError;
+}
+
+function format(lines) {
+ return EOL + lines.join(EOL) + EOL + EOL;
+}
diff --git a/index.js b/index.js
index a2c51b4..bbcdb53 100644
--- a/index.js
+++ b/index.js
@@ -1,220 +1,267 @@
var vm = require('vm');
-var requestModule = require('request');
-var jar = requestModule.jar();
-
-var request = requestModule.defaults({jar: jar}); // Cookies should be enabled
-var UserAgent = 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36';
-var Timeout = 6000; // Cloudflare requires a delay of 5 seconds, so wait for at least 6.
-var cloudscraper = {};
-var MaxChallengesToSolve = 3; // Support only this max challenges in row. If CF returns more, throw an error
-
-/**
- * Performs get request to url with headers.
- * @param {String} url
- * @param {Function} callback function(error, response, body) {}
- * @param {Object} headers Hash with headers, e.g. {'Referer': 'http://google.com', 'User-Agent': '...'}
- */
-cloudscraper.get = function(url, callback, headers) {
- performRequest({
- method: 'GET',
- url: url,
- headers: headers
- }, callback);
+var requestModule = require('request-promise');
+var errors = require('./errors');
+
+var VM_OPTIONS = {
+ timeout: 5000
};
-/**
- * Performs post request to url with headers.
- * @param {String} url
- * @param {String|Object} body Will be passed as form data
- * @param {Function} callback function(error, response, body) {}
- * @param {Object} headers Hash with headers, e.g. {'Referer': 'http://google.com', 'User-Agent': '...'}
- */
-cloudscraper.post = function(url, body, callback, headers) {
- var data = '';
- var bodyType = Object.prototype.toString.call(body);
-
- if(bodyType === '[object String]') {
- data = body;
- } else if (bodyType === '[object Object]') {
- data = Object.keys(body).map(function(key) {
- return key + '=' + body[key];
- }).join('&');
- }
+module.exports = defaults.call(requestModule);
- headers = headers || {};
- headers['Content-Type'] = headers['Content-Type'] || 'application/x-www-form-urlencoded; charset=UTF-8';
- headers['Content-Length'] = headers['Content-Length'] || data.length;
+function defaults(params) {
+ // isCloudScraper === !isRequestModule
+ var isRequestModule = this === requestModule;
- performRequest({
- method: 'POST',
- body: data,
- url: url,
- headers: headers
- }, callback);
-};
+ var defaultParams = (!isRequestModule && this.defaultParams) || {
+ requester: requestModule,
+ // Cookies should be enabled
+ jar: requestModule.jar(),
+ headers: {
+ 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36',
+ 'Cache-Control': 'private',
+ 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
+ },
+ // Cloudflare requires a delay of 5 seconds, so wait for at least 6.
+ cloudflareTimeout: 6000,
+ // followAllRedirects - follow non-GET HTTP 3xx responses as redirects
+ followAllRedirects: true,
+ // Support only this max challenges in row. If CF returns more, throw an error
+ challengesToSolve: 3
+ };
-/**
- * Performs get or post request with generic request options
- * @param {Object} options Object to be passed to request's options argument
- * @param {Function} callback function(error, response, body) {}
- */
-cloudscraper.request = function(options, callback) {
- performRequest(options, callback);
-};
+ // Object.assign requires at least nodejs v4, request only test/supports v6+
+ defaultParams = Object.assign({}, defaultParams, params);
-function performRequest(options, callback) {
- options = options || {};
- options.headers = options.headers || {};
+ var cloudscraper = requestModule.defaults
+ .call(this, defaultParams, function(options) {
+ return performRequest(options, true);
+ });
- options.headers['Cache-Control'] = options.headers['Cache-Control'] || 'private';
- options.headers['Accept'] = options.headers['Accept'] || 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5';
+ // There's no safety net here, any changes apply to all future requests
+ // that are made with this instance and derived instances.
+ cloudscraper.defaultParams = defaultParams;
- makeRequest = requestMethod(options.method);
+ // Ensure this instance gets a copy of our custom defaults function
+ // and afterwards, it will be copied over automatically.
+ if (isRequestModule) {
+ cloudscraper.defaults = defaults;
+ }
+ // Expose the debug option
+ Object.defineProperty(cloudscraper, 'debug',
+ Object.getOwnPropertyDescriptor(this, 'debug'));
- //Can't just do the normal options.encoding || 'utf8'
- //because null is a valid encoding.
- if('encoding' in options) {
- options.realEncoding = options.encoding;
- } else {
- options.realEncoding = 'utf8';
+ return cloudscraper;
+}
+
+// This function is wrapped to ensure that we get new options on first call.
+// The options object is reused in subsequent calls when calling it directly.
+function performRequest(options, isFirstRequest) {
+ // Prevent overwriting realEncoding in subsequent calls
+ if (!('realEncoding' in options)) {
+ // Can't just do the normal options.encoding || 'utf8'
+ // because null is a valid encoding.
+ if ('encoding' in options) {
+ options.realEncoding = options.encoding;
+ } else {
+ options.realEncoding = 'utf8';
+ }
}
+
options.encoding = null;
- if (!options.url || !callback) {
- throw new Error('To perform request, define both url and callback');
+ if (isNaN(options.challengesToSolve)) {
+ throw new TypeError('Expected `challengesToSolve` option to be a number, '
+ + 'got ' + typeof(options.challengesToSolve) + ' instead.');
}
- options.headers['User-Agent'] = options.headers['User-Agent'] || UserAgent;
- options.challengesToSolve = options.challengesToSolve || MaxChallengesToSolve; // Might not be the best way how to pass this variable
- options.followAllRedirects = options.followAllRedirects === undefined ? true : options.followAllRedirects;
+ // This should be the default export of either request or request-promise.
+ var requester = options.requester;
- makeRequest(options, function(error, response, body) {
- processRequestResponse(options, {error: error, response: response, body: body}, callback);
- });
+ if (typeof requester !== 'function') {
+ throw new TypeError('Expected `requester` option to be a function, got '
+ + typeof(requester) + ' instead.');
+ }
+
+ var request = requester(options);
+
+ // If the requester is not request-promise, ensure we get a callback.
+ if (typeof request.callback !== 'function') {
+ throw new TypeError('Expected a callback function, got '
+ + typeof(request.callback) + ' instead.');
+ }
+
+ // We only need the callback from the first request.
+ // The other callbacks can be safely ignored.
+ if (isFirstRequest) {
+ // This should be a user supplied callback or request-promise's callback.
+ // The callback is always wrapped/bound to the request instance.
+ options.callback = request.callback;
+ }
+
+ // The error event only provides an error argument.
+ request.removeAllListeners('error')
+ .once('error', processRequestResponse.bind(null, options));
+ // The complete event only provides response and body arguments.
+ request.removeAllListeners('complete')
+ .once('complete', processRequestResponse.bind(null, options, null));
+
+ // Indicate that this is a cloudscraper request, required by test/helper.
+ request.cloudscraper = true;
+ return request;
}
-function processRequestResponse(options, requestResult, callback) {
- var error = requestResult.error;
- var response = requestResult.response;
- var body = requestResult.body;
- var validationError;
+// The argument convention is options first where possible, options
+// always before response, and body always after response.
+function processRequestResponse(options, error, response, body) {
+ var callback = options.callback;
+
var stringBody;
var isChallengePresent;
var isRedirectChallengePresent;
- var isTargetPage; // Meaning we have finally reached the target page
if (error || !body || !body.toString) {
- return callback({ errorType: 0, error: error }, response, body);
+ // Pure request error (bad connection, wrong url, etc)
+ error = new errors.RequestError(error, options, response);
+
+ return callback(error, response, body);
}
stringBody = body.toString('utf8');
- if (validationError = checkForErrors(error, stringBody)) {
- return callback(validationError, response, body);
+ try {
+ validate(options, response, stringBody);
+ } catch (error) {
+ return callback(error, response, body);
}
isChallengePresent = stringBody.indexOf('a = document.getElementById(\'jschl-answer\');') !== -1;
isRedirectChallengePresent = stringBody.indexOf('You are being redirected') !== -1 || stringBody.indexOf('sucuri_cloudproxy_js') !== -1;
- isTargetPage = !isChallengePresent && !isRedirectChallengePresent;
+ // isTargetPage = !isChallengePresent && !isRedirectChallengePresent;
+
+ if (isChallengePresent && options.challengesToSolve === 0) {
+ var cause = 'Cloudflare challenge loop';
+ error = new errors.CloudflareError(cause, options, response);
+ error.errorType = 4;
- if(isChallengePresent && options.challengesToSolve == 0) {
- return callback({ errorType: 4 }, response, body);
+ return callback(error, response, body);
}
// If body contains specified string, solve challenge
if (isChallengePresent) {
setTimeout(function() {
- solveChallenge(response, stringBody, options, callback);
- }, Timeout);
+ solveChallenge(options, response, stringBody);
+ }, options.cloudflareTimeout);
} else if (isRedirectChallengePresent) {
- setCookieAndReload(response, stringBody, options, callback);
+ setCookieAndReload(options, response, stringBody);
} else {
// All is good
- processResponseBody(options, error, response, body, callback);
+ processResponseBody(options, response, body);
}
}
-function checkForErrors(error, body) {
+function validate(options, response, body) {
var match;
- // Pure request error (bad connection, wrong url, etc)
- if(error) {
- return { errorType: 0, error: error };
- }
-
// Finding captcha
if (body.indexOf('why_captcha') !== -1 || /cdn-cgi\/l\/chk_captcha/i.test(body)) {
- return { errorType: 1 };
+ throw new errors.CaptchaError('captcha', options, response);
}
- // trying to find '1006'
+ // Trying to find '1006'
match = body.match(/<\w+\s+class="cf-error-code">(.*)<\/\w+>/i);
if (match) {
- return { errorType: 2, error: parseInt(match[1]) };
+ var code = parseInt(match[1]);
+ throw new errors.CloudflareError(code, options, response);
}
return false;
}
-function solveChallenge(response, body, options, callback) {
- var challenge = body.match(/name="jschl_vc" value="(\w+)"/);
- var host = response.request.host;
- var makeRequest = requestMethod(options.method);
- var jsChlVc;
- var answerResponse;
- var answerUrl;
+function solveChallenge(options, response, body) {
+ var callback = options.callback;
- if (!challenge) {
- return callback({errorType: 3, error: 'I cant extract challengeId (jschl_vc) from page'}, response, body);
- }
+ var uri = response.request.uri;
+ // The JS challenge to be evaluated for answer/response.
+ var challenge;
+ // The result of challenge being evaluated in sandbox
+ var answer;
+ // The query string to send back to Cloudflare
+ // var payload = { jschl_vc, jschl_answer, pass };
+ var payload = {};
- jsChlVc = challenge[1];
+ var match;
+ var error;
+ var cause;
- challenge = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i);
+ match = body.match(/name="jschl_vc" value="(\w+)"/);
- if (!challenge) {
- return callback({errorType: 3, error: 'I cant extract method from setTimeOut wrapper'}, response, body);
+ if (!match) {
+ cause = 'challengeId (jschl_vc) extraction failed';
+ error = new errors.ParserError(cause, options, response);
+
+ return callback(error, response, body);
}
- challenge_pass = body.match(/name="pass" value="(.+?)"/)[1];
+ payload.jschl_vc = match[1];
+
+ match = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i);
- challenge = challenge[1];
+ if (!match) {
+ cause = 'setTimeout callback extraction failed';
+ error = new errors.ParserError(cause, options, response);
- challenge = challenge.replace(/a\.value =(.+?) \+ .+?;/i, '$1');
+ return callback(error, response, body);
+ }
- challenge = challenge.replace(/\s{3,}[a-z](?: = |\.).+/g, '');
- challenge = challenge.replace(/'; \d+'/g, '');
+ challenge = match[1]
+ .replace(/a\.value =(.+?) \+ .+?;/i, '$1')
+ .replace(/\s{3,}[a-z](?: = |\.).+/g, '')
+ .replace(/'; \d+'/g, '');
try {
- answerResponse = {
- 'jschl_vc': jsChlVc,
- 'jschl_answer': (eval(challenge) + response.request.host.length),
- 'pass': challenge_pass
- };
- } catch (err) {
- return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body);
+ answer = vm.runInNewContext(challenge, undefined, VM_OPTIONS);
+ payload.jschl_answer = answer + uri.hostname.length;
+ } catch (error) {
+ error.message = 'Challenge evaluation failed: ' + error.message;
+ error = new errors.ParserError(error, options, response);
+
+ return callback(error, response, body);
+ }
+
+ match = body.match(/name="pass" value="(.+?)"/);
+
+ if (!match) {
+ cause = 'Attribute (pass) value extraction failed';
+ error = new errors.ParserError(cause, options, response);
+
+ return callback(error, response, body);
}
- answerUrl = response.request.uri.protocol + '//' + host + '/cdn-cgi/l/chk_jschl';
+ payload.pass = match[1];
- options.headers['Referer'] = response.request.uri.href; // Original url should be placed as referer
- options.url = answerUrl;
- options.qs = answerResponse;
+ // Prevent reusing the headers object to simplify unit testing.
+ options.headers = Object.assign({}, options.headers);
+ // Use the original uri as the referer and to construct the answer url.
+ options.headers['Referer'] = uri.href;
+ options.uri = uri.protocol + '//' + uri.hostname + '/cdn-cgi/l/chk_jschl';
+ // Set the query string and decrement the number of challenges to solve.
+ options.qs = payload;
options.challengesToSolve = options.challengesToSolve - 1;
- // Make request with answer
- makeRequest(options, function(error, response, body) {
- processRequestResponse(options, {error: error, response: response, body: body}, callback);
- });
+ // Make request with answer.
+ performRequest(options, false);
}
-function setCookieAndReload(response, body, options, callback) {
- var challenge = body.match(/S='([^']+)'/);
- var makeRequest = requestMethod(options.method);
+function setCookieAndReload(options, response, body) {
+ var callback = options.callback;
+ var challenge = body.match(/S='([^']+)'/);
if (!challenge) {
- return callback({errorType: 3, error: 'I cant extract cookie generation code from page'}, response, body);
+ var cause = 'Cookie code extraction failed';
+ var error = new errors.ParserError(cause, options, response);
+
+ return callback(error, response, body);
}
var base64EncodedCode = challenge[1];
@@ -227,42 +274,40 @@ function setCookieAndReload(response, body, options, callback) {
document: {}
};
- vm.runInNewContext(cookieSettingCode, sandbox);
-
try {
- jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true});
- } catch (err) {
- return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body);
+ vm.runInNewContext(cookieSettingCode, sandbox, VM_OPTIONS);
+
+ options.jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true});
+ } catch (error) {
+ error.message = 'Cookie code evaluation failed: ' + error.message;
+ error = new errors.ParserError(error, options, response);
+
+ return callback(error, response, body);
}
options.challengesToSolve = options.challengesToSolve - 1;
- makeRequest(options, function(error, response, body) {
- processRequestResponse(options, {error: error, response: response, body: body}, callback);
- });
+ performRequest(options, false);
}
-// Workaround for better testing. Request has pretty poor API
-function requestMethod(method) {
- // For now only GET and POST are supported
- method = method.toUpperCase();
-
- return method === 'POST' ? request.post : request.get;
-}
+function processResponseBody(options, response, body) {
+ var callback = options.callback;
+ var error = null;
-function processResponseBody(options, error, response, body, callback) {
if(typeof options.realEncoding === 'string') {
body = body.toString(options.realEncoding);
- // In case of real encoding, try to validate the response
- // and find potential errors there.
- // If encoding is not provided, return response as it is
- if (validationError = checkForErrors(error, body)) {
- return callback(validationError, response, body);
+ // The resolveWithFullResponse option will resolve with the response
+ // object. This changes the response.body so it is as expected.
+ response.body = body;
+
+ // In case of real encoding, try to validate the response and find
+ // potential errors there, otherwise return the response as is.
+ try {
+ validate(options, response, body);
+ } catch (e) {
+ error = e;
}
}
-
callback(error, response, body);
}
-
-module.exports = cloudscraper;
diff --git a/mocha.opts b/mocha.opts
new file mode 100644
index 0000000..e32c7e6
--- /dev/null
+++ b/mocha.opts
@@ -0,0 +1,2 @@
+--reporter spec
+--require tests/common
\ No newline at end of file
diff --git a/package.json b/package.json
index 83f4d1b..f5d8e92 100644
--- a/package.json
+++ b/package.json
@@ -1,10 +1,11 @@
{
"name": "cloudscraper",
- "version": "2.0.1",
+ "version": "3.0.0",
"description": "Bypasses cloudflare's anti-ddos page",
"main": "index.js",
"scripts": {
- "test": "grunt",
+ "test": "npm run lint && nyc --reporter=html --reporter=text mocha",
+ "coverage": "nyc report --reporter=text-lcov | coveralls",
"lint": "eslint ."
},
"repository": {
@@ -24,20 +25,22 @@
"license": "MIT",
"homepage": "https://github.com/codemanki/cloudscraper",
"dependencies": {
- "request": "^2.88.0"
+ "request": "^2.88.0",
+ "request-promise": "^4.2.4"
},
"devDependencies": {
+ "chai": "^4.2.0",
+ "chai-as-promised": "^7.1.1",
+ "coveralls": "^3.0.3",
"eslint": "^5.14.1",
"eslint-config-standard": "^12.0.0",
"eslint-plugin-import": "^2.16.0",
"eslint-plugin-node": "^8.0.1",
"eslint-plugin-promise": "^4.0.1",
"eslint-plugin-standard": "^4.0.0",
- "chai": "^1.10.0",
- "grunt": "^0.4.5",
- "grunt-cli": "^1.2.0",
- "grunt-mocha-test": "^0.12.4",
- "mocha": "^2.0.1",
- "sinon": "^1.12.1"
+ "mocha": "^6.0.2",
+ "nyc": "^13.3.0",
+ "sinon": "^7.2.4",
+ "sinon-chai": "^3.3.0"
}
}
diff --git a/specs/chai.js b/specs/chai.js
deleted file mode 100644
index 43994da..0000000
--- a/specs/chai.js
+++ /dev/null
@@ -1,12 +0,0 @@
-var chai = require('chai'),
- sinon = require('sinon');
-
-chai.expect();
-
-chai.config.includeStack = true;
-
-global.expect = chai.expect;
-global.AssertionError = chai.AssertionError;
-global.Assertion = chai.Assertion;
-global.assert = chai.assert;
-global.sinon = sinon;
diff --git a/specs/spec_helper.js b/specs/spec_helper.js
deleted file mode 100644
index c07e710..0000000
--- a/specs/spec_helper.js
+++ /dev/null
@@ -1,45 +0,0 @@
-var fs = require('fs');
-var urlLib = require('url');
-var path = require('path');
-
-var testDefaults = {
- url: 'http://example-site.dev/path/',
- headers: {'User-Agent': 'Chrome'}
-};
-
-module.exports = {
- getFixture: function(fileName) {
- return fs.readFileSync('./specs/fixtures/' + fileName, 'utf8');
- },
- testDefaults: testDefaults,
- // This method returns properly faked response object for request lib, which is used inside cloudscraper library
- fakeResponseObject: function(statusCode, headers, body, url) {
- var parsedUri = urlLib.parse(url);
- parsedUri.uri = parsedUri;
-
- return {
- statusCode: statusCode,
- headers: headers,
- body: body,
- request: parsedUri //actually this is more compilcated object, but library uses only uri parts.
- };
- },
- // Terrible hack. But because of request library API, it is impossible to normally stub it. That is why cloudscraper's index.js is removed from cache each time
- dropCache: function() {
- var pathToLib = path.normalize(__dirname + '/../index.js');
- if (require.cache[pathToLib]) {
- delete require.cache[pathToLib];
- }
- },
- requestParams: function(params) {
- return Object.assign({
- method: 'GET',
- url: testDefaults.url,
- headers: testDefaults.headers,
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 3
- }, params);
- }
-};
diff --git a/specs/tests/cloudscraper.js b/specs/tests/cloudscraper.js
deleted file mode 100644
index b0f8d24..0000000
--- a/specs/tests/cloudscraper.js
+++ /dev/null
@@ -1,314 +0,0 @@
-var helper = require('../spec_helper');
-var request = require('request');
-
-describe('Cloudscraper', function() {
- var requestedPage = helper.getFixture('requested_page.html');
- var url = helper.testDefaults.url;
- var headers = helper.testDefaults.headers;
-
- // Since request.jar returns new cookie jar instance, create one global instance and then stub it in beforeEach
- var jar = request.jar();
- // Since request.defaults returns new wrapper, create one global instance and then stub it in beforeEach
- var requestDefault = request.defaults({jar: jar});
- var defaultWithArgs = helper.requestParams({});
-
- var cloudscraper;
- var sandbox;
- before(function() {
- helper.dropCache();
- });
-
- beforeEach(function () {
- sandbox = sinon.sandbox.create();
- sandbox.stub(request, 'jar').returns(jar);
- sandbox.stub(request, 'defaults').returns(requestDefault);
- cloudscraper = require('../../index');
- // since cloudflare requires timeout, the module relies on setTimeout. It should be proprely stubbed to avoid ut running for too long
- this.clock = sinon.useFakeTimers();
- });
-
- afterEach(function () {
- sandbox.restore();
- this.clock.restore();
- });
-
- it('should return requested page, if cloudflare is disabled for page', function(done) {
- var expectedResponse = { statusCode: 200 };
-
- // Stub first call, which request makes to page. It should return requested page
- sandbox.stub(requestDefault, 'get')
- .withArgs(helper.requestParams({}))
- .callsArgWith(1, null, expectedResponse, requestedPage);
-
- cloudscraper.get(url, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- expect(response).to.be.equal(expectedResponse);
- done();
- }, headers);
-
- });
-
- it('should not trigged any error if recaptcha is present in page not protected by CF', function(done) {
- var expectedResponse = { statusCode: 200 };
- var pageWithCaptcha = helper.getFixture('page_with_recaptcha.html');
-
- sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, expectedResponse, pageWithCaptcha);
-
- cloudscraper.get(url, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(pageWithCaptcha);
- expect(response).to.be.equal(expectedResponse);
- done();
- }, headers);
-
- });
-
- it('should resolve challenge (version as on 21.05.2015) and then return page', function(done) {
- var jsChallengePage = helper.getFixture('js_challenge_21_05_2015.html');
- var response = helper.fakeResponseObject(503, headers, jsChallengePage, url);
- var stubbed;
-
- // Cloudflare is enabled for site. It returns a page with js challenge
- stubbed = sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, response, jsChallengePage);
-
- // Second call to request.get will have challenge solution
- // It should contain url, answer, headers with Referer
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: {
- 'jschl_vc': '89cdff5eaa25923e0f26e29e5195dce9',
- 'jschl_answer': 633 + 'example-site.dev'.length, // 633 is a answer to cloudflares js challenge in this particular case
- 'pass': '1432194174.495-8TSfc235EQ'
- },
- headers: {
- 'User-Agent': 'Chrome',
- 'Referer': 'http://example-site.dev/path/',
- 'Cache-Control': 'private',
- 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
- },
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 2
- })
- .callsArgWith(1, null, response, requestedPage);
-
- cloudscraper.get(url, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- expect(response).to.be.equal(response);
- done();
- }, headers);
-
- this.clock.tick(7000); // tick the timeout
- });
-
- it('should resolve challenge (version as on 09.06.2016) and then return page', function(done) {
- var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html');
- var response = helper.fakeResponseObject(503, headers, jsChallengePage, url);
- var stubbed;
-
- // Cloudflare is enabled for site. It returns a page with js challenge
- stubbed = sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, response, jsChallengePage);
-
- // Second call to request.get will have challenge solution
- // It should contain url, answer, headers with Referer
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: {
- 'jschl_vc': '346b959db0cfa38f9938acc11d6e1e6e',
- 'jschl_answer': 6632 + 'example-site.dev'.length, // 6632 is a answer to cloudflares js challenge in this particular case
- 'pass': '1465488330.6-N/NbGTg+IM'
- },
- headers: {
- 'User-Agent': 'Chrome',
- 'Referer': 'http://example-site.dev/path/',
- 'Cache-Control': 'private',
- 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
- },
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 2
- })
- .callsArgWith(1, null, response, requestedPage);
-
- cloudscraper.get(url, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- expect(response).to.be.equal(response);
- done();
- }, headers);
-
- this.clock.tick(7000); // tick the timeout
- });
-
- it('should resolve 2 consequent challenges', function(done) {
- var jsChallengePage1 = helper.getFixture('js_challenge_03_12_2018_1.html');
- var jsChallengePage2 = helper.getFixture('js_challenge_03_12_2018_2.html');
- var responseJsChallengePage1 = helper.fakeResponseObject(503, headers, jsChallengePage1, url);
- var responseJsChallengePage2 = helper.fakeResponseObject(503, headers, jsChallengePage2, url);
- var stubbed;
-
- // First call and CF returns a challenge
- stubbed = sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, responseJsChallengePage1, jsChallengePage1);
-
- // We submit a solution to the first challenge, but CF decided to give us a second one
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: {
- 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa',
- 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge in this particular case
- 'pass': '1543827239.915-44n9IE20mS'
- },
- headers: {
- 'User-Agent': 'Chrome',
- 'Referer': 'http://example-site.dev/path/',
- 'Cache-Control': 'private',
- 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
- },
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 2
- })
- .callsArgWith(1, null, responseJsChallengePage2, jsChallengePage2);
-
- // We submit a solution to the second challenge and CF returns requested page
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: {
- 'jschl_vc': 'a41fee3a9f041fea01f0cbf3e8e4d29b',
- 'jschl_answer': -1.9145049856 + 'example-site.dev'.length, // 1.9145049856 is a answer to cloudflares js challenge in this particular case
- 'pass': '1543827246.024-hvxyNA3rOg'
- },
- headers: {
- 'User-Agent': 'Chrome',
- 'Referer': 'http://example-site.dev/path/',
- 'Cache-Control': 'private',
- 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'
- },
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 1
- })
- .callsArgWith(1, null, responseJsChallengePage2, requestedPage);
-
- cloudscraper.get(url, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- expect(response).to.be.equal(response);
- done();
- }, headers);
-
- this.clock.tick(14000); // tick the timeout
- });
-
- it('should make post request with body as string', function(done) {
- var expectedResponse = { statusCode: 200 };
- var body = 'form-data-body';
- var postHeaders = headers;
-
- postHeaders['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
- postHeaders['Content-Length'] = body.length;
-
-
- // Stub first call, which request makes to page. It should return requested page
- sandbox.stub(requestDefault, 'post')
- .withArgs(helper.requestParams({url: url, method: 'POST', headers: postHeaders, body: body}))
- .callsArgWith(1, null, expectedResponse, requestedPage);
-
- cloudscraper.post(url, body, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- expect(response).to.be.equal(expectedResponse);
- done();
- }, headers);
- });
-
- it('should make post request with body as object', function(done) {
- var expectedResponse = { statusCode: 200 };
- var rawBody = {a: '1', b: 2};
- var encodedBody = 'a=1&b=2';
- var postHeaders = headers;
-
- postHeaders['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
- postHeaders['Content-Length'] = encodedBody.length;
-
- // Stub first call, which request makes to page. It should return requested page
- sandbox.stub(requestDefault, 'post')
- .withArgs(helper.requestParams({url: url, method: 'POST', headers: postHeaders, body: encodedBody}))
- .callsArgWith(1, null, expectedResponse, requestedPage);
-
- cloudscraper.post(url, rawBody, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- expect(response).to.be.equal(expectedResponse);
- done();
- }, headers);
- });
-
- it('should return raw data when encoding is null', function(done) {
- var expectedResponse = { statusCode: 200 };
- var requestedData = new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64');
-
- sandbox.stub(requestDefault, 'get')
- .withArgs(helper.requestParams({url: url, headers: headers, encoding: null, realEncoding: null}))
- .callsArgWith(1, null, expectedResponse, requestedData);
-
- var options = {
- method: 'GET',
- url: url,
- encoding: null,
- headers: headers,
- followAllRedirects: true
- };
-
- cloudscraper.request(options, function(error, response, body) {
- expect(error).to.be.null();
- expect(response).to.be.equal(expectedResponse);
- expect(body).to.be.equal(requestedData);
- done();
- });
- });
-
- it('should set the given cookie and then return page', function(done) {
- var jsChallengePage = helper.getFixture('js_challenge_cookie.html');
- var response = helper.fakeResponseObject(200, headers, jsChallengePage, url);
-
- // Cloudflare is enabled for site.
- // It returns a redirecting page if a (session) cookie is unset.
- sandbox.stub(requestDefault, 'get', function fakeGet(options, cb) {
- if (options.url === url) {
- var cookieString = jar.getCookieString(url);
- if (cookieString === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a') {
- cb(null, response, requestedPage);
- } else {
- cb(null, response, jsChallengePage);
- }
- } else {
- cb(new Error("Unexpected request"));
- }
- });
-
- cloudscraper.get(url, function(error, response, body) {
- expect(error).to.be.null();
- expect(body).to.be.equal(requestedPage);
- done();
- }, headers);
- });
-});
diff --git a/specs/tests/errors.js b/specs/tests/errors.js
deleted file mode 100644
index c14ef17..0000000
--- a/specs/tests/errors.js
+++ /dev/null
@@ -1,221 +0,0 @@
-var helper = require('../spec_helper');
-var request = require('request');
-
-describe('Cloudscraper', function() {
- var sandbox;
- var captchaPage = helper.getFixture('captcha.html');
- var accessDenied = helper.getFixture('access_denied.html');
- var invalidChallenge = helper.getFixture('invalid_js_challenge.html');
- var url = helper.testDefaults.url;
- var headers = helper.testDefaults.headers;
-
- // Since request.defaults returns new wrapper, create one global instance and then stub it in beforeEach
- var requestDefault = request.defaults({jar: true});
- var defaultWithArgs = helper.requestParams({});
-
- var cloudscraper;
- before(function() {
- helper.dropCache();
- });
-
- beforeEach(function () {
- sandbox = sinon.sandbox.create();
- sandbox.stub(request, 'defaults').returns(requestDefault);
- cloudscraper = require('../../index');
- // since cloudflare requires timeout, the module relies on setTimeout. It should be proprely stubbed to avoid ut running for too long
- this.clock = sinon.useFakeTimers();
- });
-
- afterEach(function () {
- sandbox.restore();
- this.clock.restore();
- });
-
- it('should return error if it was thrown by request', function(done) {
- var response = { statusCode: 500 },
- fakeError = {fake: 'error'}; //not real request error, but it doesn't matter
-
- sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, fakeError, response, '');
-
- cloudscraper.get(url, function(error) {
- expect(error).to.be.eql({errorType: 0, error: fakeError}); // errorType 0, means it is some kind of system error
- done();
- }, headers);
-
- });
-
- it('should return error if captcha is served by cloudflare', function(done){
- var response = { statusCode: 503 };
-
- sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, response, captchaPage);
-
- cloudscraper.get(url, function(error, body, response) {
- expect(error).to.be.eql({errorType: 1}); // errorType 1, means captcha is served
- expect(response).to.be.eql(captchaPage);
- done();
- }, headers);
- });
-
- it('should return error if cloudflare returned some inner error', function(done){
- //https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008
- var response = { statusCode: 500 };
-
- sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, response, accessDenied);
-
- cloudscraper.get(url, function(error, body, response) {
- expect(error).to.be.eql({errorType: 2, error: 1006}); // errorType 2, means inner cloudflare error
- expect(response).to.be.eql(accessDenied);
- done();
- }, headers);
- });
-
- it('should return errior if cf presented more than 3 challenges in a row', function(done) {
- var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html');
- var response = helper.fakeResponseObject(503, headers, jsChallengePage, url);
- var stubbed;
-
- var pageWithCaptchaResponse = { statusCode: 200 };
- // Cloudflare is enabled for site. It returns a page with js challenge
- stubbed = sandbox.stub(requestDefault, 'get')
- .withArgs(helper.requestParams({url: url, headers: headers}))
- .callsArgWith(1, null, response, jsChallengePage);
-
- // Second call to request.get returns challenge
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: sinon.match.any,
- headers: sinon.match.any,
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 2
- })
- .callsArgWith(1, null, response, jsChallengePage);
-
- // Third call to request.get returns challenge
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: sinon.match.any,
- headers: sinon.match.any,
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 1
- })
- .callsArgWith(1, null, response, jsChallengePage);
-
- // Fourth call to request.get still returns a challenge
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: sinon.match.any,
- headers: sinon.match.any,
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 0
- })
- .callsArgWith(1, null, response, jsChallengePage);
-
- cloudscraper.get(url, function(error, body, response) {
- expect(error).to.be.eql({errorType: 4}); // errorType 1, means captcha is served
- expect(response).to.be.eql(jsChallengePage);
- done();
- }, headers);
-
- this.clock.tick(200000); // tick the timeout
- });
- it('should return error if body is undefined', function(done){
- //https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008
- var response = { statusCode: 500 };
-
- sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, response, undefined);
-
- cloudscraper.get(url, function(error, body, response) {
- expect(error).to.be.eql({errorType: 0, error: null}); // errorType 2, means inner cloudflare error
- expect(response).to.be.eql(undefined);
- done();
- }, headers);
- });
-
- it('should return error if challenge page failed to be parsed', function(done) {
- var response = helper.fakeResponseObject(200, headers, invalidChallenge, url);
- sandbox.stub(requestDefault, 'get')
- .withArgs(defaultWithArgs)
- .callsArgWith(1, null, response, invalidChallenge);
-
- cloudscraper.get(url, function(error, body, response) {
- expect(error.errorType).to.be.eql(3); // errorType 3, means parsing failed
- expect(response).to.be.eql(invalidChallenge);
- done();
- }, headers);
-
- this.clock.tick(7000); // tick the timeout
- });
-
- it('should return error if it was thrown by request when solving challenge', function(done) {
- var jsChallengePage = helper.getFixture('js_challenge_21_05_2015.html'),
- response = helper.fakeResponseObject(503, headers, jsChallengePage, url),
- connectionError = {error: 'ECONNRESET'},
- stubbed;
-
- // Cloudflare is enabled for site. It returns a page with js challenge
- stubbed = sandbox.stub(requestDefault, 'get')
- .onCall(0)
- .callsArgWith(1, null, response, jsChallengePage);
-
- stubbed
- .onCall(1)
- .callsArgWith(1, connectionError);
-
- cloudscraper.get(url, function(error) {
- expect(error).to.be.eql({errorType: 0, error: connectionError}); // errorType 0, connection eror for example
- done();
- }, headers);
-
- this.clock.tick(7000); // tick the timeout
- });
-
- it('should properly handle a case when after a challenge another one is returned', function(done) {
- var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html');
- var response = helper.fakeResponseObject(503, headers, jsChallengePage, url);
- var stubbed;
-
- var pageWithCaptchaResponse = { statusCode: 200 };
- // Cloudflare is enabled for site. It returns a page with js challenge
- stubbed = sandbox.stub(requestDefault, 'get')
- .withArgs(helper.requestParams({url: url, headers: headers}))
- .callsArgWith(1, null, response, jsChallengePage);
-
- // Second call to request.get returns recaptcha
- stubbed.withArgs({
- method: 'GET',
- url: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
- qs: sinon.match.any,
- headers: sinon.match.any,
- encoding: null,
- realEncoding: 'utf8',
- followAllRedirects: true,
- challengesToSolve: 2
- })
- .callsArgWith(1, null, pageWithCaptchaResponse, captchaPage);
-
- cloudscraper.get(url, function(error, body, response) {
- expect(error).to.be.eql({errorType: 1}); // errorType 1, means captcha is served
- expect(response).to.be.eql(captchaPage);
- done();
- }, headers);
-
- this.clock.tick(7000); // tick the timeout
- });
-});
diff --git a/test/common.js b/test/common.js
new file mode 100644
index 0000000..1c96a18
--- /dev/null
+++ b/test/common.js
@@ -0,0 +1,8 @@
+'use strict';
+
+var chai = require('chai');
+
+chai.use(require('sinon-chai'));
+chai.use(require('chai-as-promised'));
+
+chai.config.includeStack = true;
\ No newline at end of file
diff --git a/specs/fixtures/access_denied.html b/test/fixtures/access_denied.html
similarity index 100%
rename from specs/fixtures/access_denied.html
rename to test/fixtures/access_denied.html
diff --git a/specs/fixtures/captcha.html b/test/fixtures/captcha.html
similarity index 100%
rename from specs/fixtures/captcha.html
rename to test/fixtures/captcha.html
diff --git a/specs/fixtures/invalid_js_challenge.html b/test/fixtures/invalid_js_challenge.html
similarity index 100%
rename from specs/fixtures/invalid_js_challenge.html
rename to test/fixtures/invalid_js_challenge.html
diff --git a/specs/fixtures/js_challenge_03_12_2018_1.html b/test/fixtures/js_challenge_03_12_2018_1.html
similarity index 100%
rename from specs/fixtures/js_challenge_03_12_2018_1.html
rename to test/fixtures/js_challenge_03_12_2018_1.html
diff --git a/specs/fixtures/js_challenge_03_12_2018_2.html b/test/fixtures/js_challenge_03_12_2018_2.html
similarity index 100%
rename from specs/fixtures/js_challenge_03_12_2018_2.html
rename to test/fixtures/js_challenge_03_12_2018_2.html
diff --git a/specs/fixtures/js_challenge_09_06_2016.html b/test/fixtures/js_challenge_09_06_2016.html
similarity index 100%
rename from specs/fixtures/js_challenge_09_06_2016.html
rename to test/fixtures/js_challenge_09_06_2016.html
diff --git a/specs/fixtures/js_challenge_21_05_2015.html b/test/fixtures/js_challenge_21_05_2015.html
similarity index 100%
rename from specs/fixtures/js_challenge_21_05_2015.html
rename to test/fixtures/js_challenge_21_05_2015.html
diff --git a/specs/fixtures/js_challenge_cookie.html b/test/fixtures/js_challenge_cookie.html
similarity index 100%
rename from specs/fixtures/js_challenge_cookie.html
rename to test/fixtures/js_challenge_cookie.html
diff --git a/specs/fixtures/page_with_recaptcha.html b/test/fixtures/page_with_recaptcha.html
similarity index 100%
rename from specs/fixtures/page_with_recaptcha.html
rename to test/fixtures/page_with_recaptcha.html
diff --git a/specs/fixtures/requested_page.html b/test/fixtures/requested_page.html
similarity index 100%
rename from specs/fixtures/requested_page.html
rename to test/fixtures/requested_page.html
diff --git a/test/helper.js b/test/helper.js
new file mode 100644
index 0000000..70c602d
--- /dev/null
+++ b/test/helper.js
@@ -0,0 +1,105 @@
+var request = require('./rp');
+var sinon = require('sinon');
+var fs = require('fs');
+var path = require('path');
+
+var defaultParams = {
+ // Since cloudscraper wraps the callback, just ensure callback is a function
+ callback: sinon.match.func,
+ requester: sinon.match.func,
+ jar: request.jar(),
+ uri: 'http://example-site.dev/path/',
+ headers: {
+ "User-Agent": "Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36",
+ "Cache-Control": "private",
+ "Accept": "application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5"
+ },
+ method: 'GET',
+ encoding: null,
+ realEncoding: 'utf8',
+ followAllRedirects: true,
+ cloudflareTimeout: 6000,
+ challengesToSolve: 3
+};
+
+// Cache fixtures so they're not read from the fs but once
+var cache = {};
+
+module.exports = {
+ getFixture: function(fileName) {
+ if (cache[fileName] === undefined) {
+ cache[fileName] = fs.readFileSync(path.join(__dirname, 'fixtures', fileName), 'utf8');
+ }
+ return cache[fileName];
+ },
+ defaultParams: defaultParams,
+ fakeResponse: function(template) {
+ return Object.assign({
+ statusCode: 200,
+ headers: defaultParams.headers,
+ body: ''
+ }, template);
+ },
+ extendParams: function(params) {
+ // Extend target with the default params and provided params
+ var target = Object.assign({}, defaultParams, params);
+ // Extend target.headers with defaults headers and provided headers
+ target.headers = Object.assign({}, defaultParams.headers, params.headers);
+ return target;
+ },
+ fakeRequest: function(template) {
+ // In this context, fake is the request result
+ var fake = Object.assign({
+ error: null,
+ // Set the default fake statusCode to 500 if an error is provided
+ response: { statusCode: template.error ? 500 : 200 }
+ }, template);
+
+ // Use the body from fake response if the template doesn't provide it
+ if (!('body' in fake)) {
+ fake.body = fake.response.body;
+ }
+
+ // Freeze the fake result and it's properties for more reliable tests.
+ Object.freeze(fake);
+ Object.keys(fake).forEach(function (key) {
+ if (!Object.isFrozen(fake[key]) && !Buffer.isBuffer(fake[key])) {
+ // Mark all existing properties as non-configurable and non-writable.
+ var target = fake[key];
+ Object.keys(target).forEach(function (key) {
+ var desc = Object.getOwnPropertyDescriptor(target, key);
+ if (desc.configurable) {
+ desc.configurable = false;
+ if (desc.writable !== undefined) {
+ desc.writable = false;
+ }
+ Object.defineProperty(target, key, desc);
+ }
+ });
+ }
+ });
+
+ return function Request(params) {
+ var instance = request(params);
+
+ // This is a hack to prevent sending events to early. See #104
+ Object.defineProperty(instance, 'cloudscraper', {
+ set: function() {
+ // Add the required convenience property to fake the response.
+ fake.response.request = this;
+
+ if (fake.error !== null) {
+ this.emit('error', fake.error);
+ } else {
+ this.emit('complete', fake.response, fake.body);
+ }
+ },
+ get: function() {
+ return true;
+ }
+ });
+
+ return instance;
+ };
+ }
+};
diff --git a/test/rp.js b/test/rp.js
new file mode 100644
index 0000000..bd9462d
--- /dev/null
+++ b/test/rp.js
@@ -0,0 +1,26 @@
+'use strict';
+
+// Reproduces: https://github.com/request/request-promise/blob/6d11ddc63dde2462a8e39cd8d0b6956556b977f1/lib/rp.js
+// It must be done this way because request-promise bypasses require.cache.
+
+var Bluebird = require('bluebird').getNewLibraryCopy();
+var configure = require('request-promise-core/configure/request2');
+var request = require('request');
+
+// Replacing start with a noop prevents real requests from being made.
+// Request -> Request.prototype.init -> Request.prototype.start
+// The test/helper is responsible for calling back with a fake response.
+request.Request.prototype.start = function(){};
+
+configure({
+ request: request,
+ PromiseImpl: Bluebird,
+ expose: [
+ 'then',
+ 'catch',
+ 'finally',
+ 'promise'
+ ]
+});
+
+module.exports = request;
diff --git a/test/test-errors.js b/test/test-errors.js
new file mode 100644
index 0000000..75e3066
--- /dev/null
+++ b/test/test-errors.js
@@ -0,0 +1,441 @@
+'use strict';
+
+var cloudscraper = require('../index');
+var request = require('request-promise');
+var errors = require('../errors');
+var helper = require('./helper');
+
+var sinon = require('sinon');
+var expect = require('chai').expect;
+var assert = require('chai').assert;
+
+describe('Cloudscraper', function() {
+ var uri = helper.defaultParams.uri;
+ var sandbox;
+ var Request;
+
+ beforeEach(function () {
+ sandbox = sinon.createSandbox();
+ // Prepare stubbed Request for each test
+ Request = sandbox.stub(request, 'Request');
+ // setTimeout should be properly stubbed to prevent the unit test from running too long.
+ this.clock = sinon.useFakeTimers();
+ });
+
+ afterEach(function () {
+ sandbox.restore();
+ this.clock.restore();
+ });
+
+ it('should return error if it was thrown by request', function(done) {
+ var fakeError = new Error('fake');
+
+ Request.callsFake(helper.fakeRequest({ error: fakeError }));
+
+ var promise = cloudscraper.get(uri, function (error) {
+ expect(error).to.be.instanceOf(errors.RequestError);
+ expect(error).to.have.property('error', fakeError);
+ expect(error).to.have.property('errorType', 0);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done);
+ });
+
+ it('should return error if captcha is served by cloudflare', function(done) {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('captcha.html')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ // errorType 1, means captcha is served
+ expect(error).to.be.instanceOf(errors.CaptchaError);
+ expect(error).to.have.property('error', 'captcha');
+ expect(error).to.have.property('errorType', 1);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done);
+ });
+
+ it('should return error if cloudflare returned some inner error', function(done) {
+ // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages
+ // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008
+
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 500,
+ body: helper.getFixture('access_denied.html')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ // errorType 2, means inner cloudflare error
+ expect(error).to.be.instanceOf(errors.CloudflareError);
+ expect(error).to.have.property('error', 1006);
+ expect(error).to.have.property('errorType', 2);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done);
+ });
+
+ it('should return error if cf presented more than 3 challenges in a row', function(done) {
+ // The expected params for all subsequent calls to Request
+ var expectedParams = helper.extendParams({
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl'
+ });
+
+ // Perform less strict matching on headers and qs to simplify this test
+ Object.assign(expectedParams, {
+ headers: sinon.match.object,
+ qs: sinon.match.object
+ });
+
+ // Cloudflare is enabled for site. It returns a page with js challenge
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_09_06_2016.html')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.CloudflareError);
+ expect(error).to.have.property('error', 'Cloudflare challenge loop');
+ expect(error).to.have.property('errorType', 4);
+
+ assert.equal(Request.callCount, 4, 'Request call count');
+ expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams);
+
+ var total = helper.defaultParams.challengesToSolve + 1;
+ for (var i = 1; i < total; i++) {
+ // Decrement the number of challengesToSolve to match actual params
+ expectedParams.challengesToSolve -= 1;
+ expect(Request.getCall(i)).to.be.calledWithExactly(expectedParams);
+ }
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done);
+
+ // Tick the timeout
+ this.clock.tick(200000);
+ });
+
+ it('should return error if body is undefined', function(done) {
+ // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages
+ // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008
+
+ Request.callsFake(helper.fakeRequest({
+ response: {statusCode: 500}
+ }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.RequestError);
+ expect(error).to.have.property('error', null);
+ expect(error).to.have.property('errorType', 0);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(body).to.be.equal(undefined);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done);
+ });
+
+ it('should return error if challenge page failed to be parsed', function(done) {
+ var expectedResponse = helper.fakeResponse({
+ body: helper.getFixture('invalid_js_challenge.html')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.ParserError);
+ expect(error).to.have.property('error').that.is.ok;
+ expect(error).to.have.property('errorType', 3);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+
+ it('should return error if js challenge has error during evaluation', function(done) {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_03_12_2018_1.html')
+ });
+
+ // Adds a syntax error near the end of line 37
+ expectedResponse.body = expectedResponse.body.replace(/\.toFixed/gm, '..toFixed');
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.ParserError);
+ expect(error).to.have.property('error').that.is.an('error');
+ expect(error).to.have.property('errorType', 3);
+ expect(error.message).to.include('Challenge evaluation failed');
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+
+ it('should return error if challengeId extraction fails', function(done) {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_03_12_2018_1.html')
+ });
+
+ expectedResponse.body = expectedResponse.body.replace(/name="jschl_vc"/gm, '');
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.ParserError);
+ expect(error).to.have.property('error', 'challengeId (jschl_vc) extraction failed');
+ expect(error).to.have.property('errorType', 3);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+
+
+ it('should return error if it was thrown by request when solving challenge', function(done) {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_21_05_2015.html')
+ });
+
+ var fakeError = Object.assign(new Error('read ECONNRESET'), {
+ code: 'ECONNRESET', errno: 'ECONNRESET', syscall: 'read'
+ });
+
+ // Cloudflare is enabled for site. It returns a page with js challenge
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ Request.onSecondCall()
+ .callsFake(helper.fakeRequest({ error: fakeError }));
+
+ var promise = cloudscraper.get(uri, function (error) {
+ // errorType 0, a connection error for example
+ expect(error).to.be.instanceOf(errors.RequestError);
+ expect(error).to.have.property('error', fakeError);
+ expect(error).to.have.property('errorType', 0);
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done);
+
+ // tick the timeout
+ this.clock.tick(7000);
+ });
+
+ it('should properly handle a case when after a challenge another one is returned', function(done) {
+ // Cloudflare is enabled for site. It returns a page with js challenge
+ var firstResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_09_06_2016.html')
+ });
+
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ // Second call to request.get returns recaptcha
+ var secondParams = helper.extendParams({
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
+ challengesToSolve: 2
+ });
+
+ // Perform less strict matching on headers and qs to simplify this test
+ Object.assign(secondParams, {
+ headers: sinon.match.object,
+ qs: sinon.match.object
+ });
+
+ var secondResponse = helper.fakeResponse({
+ body: helper.getFixture('captcha.html')
+ });
+
+ Request.onSecondCall()
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ // errorType 1, means captcha is served
+ expect(error).to.be.instanceOf(errors.CaptchaError);
+ expect(error).to.have.property('error', 'captcha');
+ expect(error).to.have.property('errorType', 1);
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+
+ expect(response).to.be.equal(secondResponse);
+ expect(body).to.be.equal(secondResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+
+ it('should return error if challenge page cookie extraction fails', function(done) {
+ // Cloudflare is enabled for site.
+ // It returns a redirecting page if a (session) cookie is unset.
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ // The cookie extraction codes looks for the `S` variable assignment
+ body: helper.getFixture('js_challenge_cookie.html').replace(/S=/gm, 'Z=')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.ParserError);
+ expect(error).to.have.property('error', 'Cookie code extraction failed');
+ expect(error).to.have.property('errorType', 3);
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done);
+ });
+
+ it('should throw a TypeError if callback is not a function', function(done) {
+ var spy = sinon.spy(function() {
+ cloudscraper.get(uri);
+ });
+
+ expect(spy).to.throw(TypeError, /Expected a callback function/);
+ done();
+ });
+
+ it('should throw a TypeError if requester is not a function', function (done) {
+ var spy = sinon.spy(function () {
+ cloudscraper.get({ requester: null });
+ });
+
+ expect(spy).to.throw(TypeError, /`requester` option .*function/);
+ done();
+ });
+
+ it('should throw a TypeError if challengesToSolve is not a number', function(done) {
+ var spy = sinon.spy(function() {
+ var options = { uri: uri, challengesToSolve: 'abc' };
+
+ cloudscraper.get(options, function(){});
+ });
+
+ expect(spy).to.throw(TypeError, /`challengesToSolve` option .*number/);
+ done();
+ });
+
+ it('should detect captcha in response body\'s real encoding', function (done) {
+ var firstParams = helper.extendParams({
+ realEncoding: 'fake-encoding'
+ });
+
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: {
+ toString: function(encoding) {
+ if (encoding === 'fake-encoding') {
+ return helper.getFixture('captcha.html');
+ }
+
+ return 'fake response body';
+ }
+ }
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var options = { uri: uri, encoding: 'fake-encoding' };
+
+ var promise = cloudscraper.get(options, function (error, response, body) {
+ // errorType 1, means captcha is served
+ expect(error).to.be.instanceOf(errors.CaptchaError);
+ expect(error).to.have.property('error', 'captcha');
+ expect(error).to.have.property('errorType', 1);
+
+ expect(Request).to.be.calledOnceWithExactly(firstParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body.toString('fake-encoding'));
+ });
+
+ expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+
+ it('should return error if cookie setting code evaluation fails', function(done) {
+ // Change the cookie setting code so the vm will throw an error
+ var html = helper.getFixture('js_challenge_cookie.html');
+ var b64 = (new Buffer('throw new Error(\'vm eval failed\');')).toString('base64');
+
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: html.replace(/S='([^']+)'/, 'S=\'' + b64 + '\'')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.instanceOf(errors.ParserError);
+ expect(error).to.have.property('error').that.is.an('error');
+ expect(error).to.have.property('errorType', 3);
+ expect(error.message).to.include('vm eval failed');
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+});
diff --git a/test/test-index.js b/test/test-index.js
new file mode 100644
index 0000000..02cffa8
--- /dev/null
+++ b/test/test-index.js
@@ -0,0 +1,470 @@
+'use strict';
+
+var cloudscraper = require('../index');
+var request = require('request-promise');
+var helper = require('./helper');
+
+var sinon = require('sinon');
+var expect = require('chai').expect;
+
+describe('Cloudscraper', function () {
+ var requestedPage = helper.getFixture('requested_page.html');
+ var uri = helper.defaultParams.uri;
+ var sandbox;
+ var Request;
+
+ beforeEach(function () {
+ helper.defaultParams.jar = request.jar();
+ sandbox = sinon.createSandbox();
+ // Prepare stubbed Request for each test
+ Request = sandbox.stub(request, 'Request');
+ // setTimeout should be properly stubbed to prevent the unit test from running too long.
+ this.clock = sinon.useFakeTimers();
+ });
+
+ afterEach(function () {
+ sandbox.restore();
+ this.clock.restore();
+ });
+
+ it('should return requested page, if cloudflare is disabled for page', function (done) {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 200,
+ body: requestedPage
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done);
+ });
+
+ it('should not trigger any error if recaptcha is present in page not protected by CF', function (done) {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 200,
+ body: helper.getFixture('page_with_recaptcha.html')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledOnceWithExactly(helper.defaultParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done);
+ });
+
+ it('should resolve challenge (version as on 21.05.2015) and then return page', function (done) {
+ // Cloudflare is enabled for site. It returns a page with js challenge
+ var firstResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_21_05_2015.html')
+ });
+
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ var secondParams = helper.extendParams({
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
+ qs: {
+ 'jschl_vc': '89cdff5eaa25923e0f26e29e5195dce9',
+ // 633 is a answer to cloudflare's js challenge in this particular case
+ 'jschl_answer': 633 + 'example-site.dev'.length,
+ 'pass': '1432194174.495-8TSfc235EQ'
+ },
+ headers: {
+ 'Referer': 'http://example-site.dev/path/'
+ },
+ challengesToSolve: 2
+ });
+
+ // Second call to Request will have challenge solution
+ // It should contain uri, answer, headers with Referer
+ var secondResponse = helper.fakeResponse({ body: requestedPage });
+
+ Request.onSecondCall()// Cloudflare is enabled for site. It returns a page with js challenge
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+
+ expect(response).to.be.equal(secondResponse);
+ expect(body).to.be.equal(secondResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(secondResponse.body).and.notify(done);
+
+ // tick the timeout
+ this.clock.tick(7000);
+ });
+
+ it('should resolve challenge (version as on 09.06.2016) and then return page', function (done) {
+ // Cloudflare is enabled for site. It returns a page with js challenge
+ var firstResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_09_06_2016.html')
+ });
+
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ var secondParams = helper.extendParams({
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
+ qs: {
+ 'jschl_vc': '346b959db0cfa38f9938acc11d6e1e6e',
+ // 6632 is a answer to cloudflares js challenge in this particular case
+ 'jschl_answer': 6632 + 'example-site.dev'.length,
+ 'pass': '1465488330.6-N/NbGTg+IM'
+ },
+ headers: {
+ 'Referer': 'http://example-site.dev/path/'
+ },
+ challengesToSolve: 2
+ });
+
+ // Second call to Request will have challenge solution
+ // It should contain uri, answer, headers with Referer
+ var secondResponse = helper.fakeResponse({ body: requestedPage });
+
+ Request.onSecondCall()
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ var promise = cloudscraper.get(uri, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+
+ expect(response).to.be.equal(secondResponse);
+ expect(body).to.be.equal(secondResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(secondResponse.body).and.notify(done);
+
+ this.clock.tick(7000); // tick the timeout
+ });
+
+ it('should resolve 2 consequent challenges', function (done) {
+ var firstParams = helper.extendParams({ resolveWithFullResponse: true });
+ // First call and CF returns a challenge
+ var firstResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_03_12_2018_1.html')
+ });
+
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ var secondParams = helper.extendParams({
+ resolveWithFullResponse: true,
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
+ qs: {
+ 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa',
+ 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge
+ // in this particular case
+ 'pass': '1543827239.915-44n9IE20mS'
+ },
+ headers: {
+ 'Referer': 'http://example-site.dev/path/'
+ },
+ challengesToSolve: 2
+ });
+
+ // We submit a solution to the first challenge, but CF decided to give us a second one
+ var secondResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_03_12_2018_2.html')
+ });
+
+ Request.onSecondCall()
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ var thirdParams = helper.extendParams({
+ resolveWithFullResponse: true,
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
+ qs: {
+ 'jschl_vc': 'a41fee3a9f041fea01f0cbf3e8e4d29b',
+ // 1.9145049856 is a answer to cloudflares js challenge in this particular case
+ 'jschl_answer': -1.9145049856 + 'example-site.dev'.length,
+ 'pass': '1543827246.024-hvxyNA3rOg'
+ },
+ headers: {
+ 'Referer': 'http://example-site.dev/cdn-cgi/l/chk_jschl?jschl_vc=427c2b1cd4fba29608ee81b200e94bfa&jschl_answer=10.66734594&pass=1543827239.915-44n9IE20mS'
+ },
+ challengesToSolve: 1
+ });
+
+ var thirdResponse = helper.fakeResponse({ body: requestedPage });
+
+ // We submit a solution to the second challenge and CF returns requested page
+ Request.onThirdCall()
+ .callsFake(helper.fakeRequest({ response: thirdResponse }));
+
+ var options = { uri: uri, resolveWithFullResponse: true };
+
+ var promise = cloudscraper.get(options, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledThrice;
+ expect(Request.firstCall).to.be.calledWithExactly(firstParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+ expect(Request.thirdCall).to.be.calledWithExactly(thirdParams);
+
+ expect(response).to.be.equal(thirdResponse);
+ expect(body).to.be.equal(thirdResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(thirdResponse).and.notify(done);
+
+ this.clock.tick(14000); // tick the timeout
+ });
+
+ it('should make post request with formData', function (done) {
+ var formData = { some: 'data' };
+
+ var expectedParams = helper.extendParams({
+ method: 'POST',
+ formData: formData
+ });
+ // Stub first call, which request makes to page. It should return requested page
+ var expectedResponse = helper.fakeResponse({ body: requestedPage });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var options = { uri: uri, formData: formData };
+
+ var promise = cloudscraper.post(options, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledOnceWithExactly(expectedParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done);
+ });
+
+ it('should make delete request', function (done) {
+ var expectedParams = helper.extendParams({ method: 'DELETE' });
+ // Stub first call, which request makes to page. It should return requested page
+ var expectedResponse = helper.fakeResponse({ body: requestedPage });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.delete(uri, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledOnceWithExactly(expectedParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done);
+ });
+
+ it('should return raw data when encoding is null', function (done) {
+ var expectedParams = helper.extendParams({ realEncoding: null });
+
+ var expectedResponse = helper.fakeResponse({
+ body: new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64')
+ });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var options = { uri: uri, encoding: null };
+
+ var promise = cloudscraper.get(options, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledOnceWithExactly(expectedParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(body).to.be.equal(expectedResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done);
+ });
+
+ it('should set the given cookie and then return page', function (done) {
+ var firstResponse = helper.fakeResponse({
+ body: helper.getFixture('js_challenge_cookie.html')
+ });
+
+ // Cloudflare is enabled for site.
+ // It returns a redirecting page if a (session) cookie is unset.
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ var secondParams = helper.extendParams({ challengesToSolve: 2 });
+ var secondResponse = helper.fakeResponse({ body: requestedPage });
+
+ // Only callback with the second response if the cookie string matches
+ var matchCookie = sinon.match(function (params) {
+ return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a';
+ });
+
+ // Prevent a matching error if for some reason params.jar is missing or invalid.
+ var matchParams = sinon.match.has('jar', sinon.match.object).and(matchCookie);
+
+ Request.withArgs(matchParams)
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ // We need to override cloudscraper's default jar for this test
+ var options = { uri: uri, jar: helper.defaultParams.jar };
+
+ var promise = cloudscraper.get(options, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+
+ expect(response).to.be.equal(secondResponse);
+ expect(body).to.be.equal(secondResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(secondResponse.body).and.notify(done);
+ });
+
+ it('should not use proxy\'s uri', function (done) {
+
+ var firstParams = helper.extendParams({
+ proxy: 'https://example-proxy-site.dev/path/'
+ });
+
+ var firstResponse = helper.fakeResponse({
+ statusCode: 503,
+ body: helper.getFixture('js_challenge_03_12_2018_1.html')
+ });
+
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ var secondParams = helper.extendParams({
+ proxy: 'https://example-proxy-site.dev/path/',
+ uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl',
+ qs: {
+ 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa',
+ 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge
+ // in this particular case
+ 'pass': '1543827239.915-44n9IE20mS'
+ },
+ headers: {
+ 'Referer': 'http://example-site.dev/path/'
+ },
+ challengesToSolve: 2
+ });
+
+ var secondResponse = helper.fakeResponse({ body: requestedPage });
+
+ Request.onSecondCall()
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ var options = { uri: uri, proxy: 'https://example-proxy-site.dev/path/' };
+
+ var promise = cloudscraper.get(options, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(firstParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+
+ expect(response).to.be.equal(secondResponse);
+ expect(body).to.be.equal(secondResponse.body);
+ });
+
+ expect(promise).to.eventually.equal(secondResponse.body).and.notify(done);
+
+ this.clock.tick(14000); // tick the timeout
+ });
+
+ it('should reuse the provided cookie jar', function(done) {
+ var customJar = request.jar();
+
+ var firstParams = helper.extendParams({ jar: customJar });
+
+ var firstResponse = helper.fakeResponse({
+ body: helper.getFixture('js_challenge_cookie.html')
+ });
+
+ // Cloudflare is enabled for site.
+ // It returns a redirecting page if a (session) cookie is unset.
+ Request.onFirstCall()
+ .callsFake(helper.fakeRequest({ response: firstResponse }));
+
+ var secondParams = helper.extendParams({
+ jar: customJar,
+ challengesToSolve: 2
+ });
+
+ var secondResponse = helper.fakeResponse({ body: requestedPage });
+
+ // Only callback with the second response if the cookie string matches
+ var matchCookie = sinon.match(function (params) {
+ return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a';
+ });
+
+ // Prevent a matching error if for some reason params.jar is missing or invalid.
+ var matchParams = sinon.match.has('jar', sinon.match.object).and(matchCookie);
+
+ Request.withArgs(matchParams)
+ .callsFake(helper.fakeRequest({ response: secondResponse }));
+
+ // We need to override cloudscraper's default jar for this test
+ var options = { uri: uri, jar: customJar };
+
+ customJar.setCookie('custom cookie', 'http://custom-site.dev/');
+
+ cloudscraper.get(options, function (error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request).to.be.calledTwice;
+ expect(Request.firstCall).to.be.calledWithExactly(firstParams);
+ expect(Request.secondCall).to.be.calledWithExactly(secondParams);
+
+ expect(response).to.be.equal(secondResponse);
+ expect(body).to.be.equal(secondResponse.body);
+
+ var customCookie = customJar.getCookieString('http://custom-site.dev/');
+ expect(customCookie).to.equal('custom cookie');
+
+ cloudscraper.get(options, function(error, response, body) {
+ expect(error).to.be.null;
+
+ expect(Request.thirdCall.args[0].jar).to.equal(customJar);
+ customCookie = customJar.getCookieString('http://custom-site.dev/');
+ expect(customCookie).to.equal('custom cookie');
+
+ done();
+ });
+ });
+ });
+
+ it('should define custom defaults function', function (done) {
+ expect(cloudscraper.defaults).to.not.equal(request.defaults);
+
+ var custom = cloudscraper.defaults({ challengesToSolve: 5 });
+ expect(custom.defaults).to.equal(cloudscraper.defaults);
+ done();
+ });
+});
diff --git a/test/test-rp.js b/test/test-rp.js
new file mode 100644
index 0000000..e43e40b
--- /dev/null
+++ b/test/test-rp.js
@@ -0,0 +1,105 @@
+'use strict';
+
+var cloudscraper = require('../index');
+var request = require('request-promise');
+var helper = require('./helper');
+
+var sinon = require('sinon');
+var expect = require('chai').expect;
+
+describe('Cloudscraper promise', function () {
+ var requestedPage = helper.getFixture('requested_page.html');
+ var uri = helper.defaultParams.uri;
+ var sandbox;
+ var Request;
+
+ beforeEach(function () {
+ helper.defaultParams.jar = request.jar();
+ sandbox = sinon.createSandbox();
+ // Prepare stubbed Request for each test
+ Request = sandbox.stub(request, 'Request');
+ // setTimeout should be properly stubbed to prevent the unit test from running too long.
+ this.clock = sinon.useFakeTimers();
+ });
+
+ afterEach(function () {
+ sandbox.restore();
+ this.clock.restore();
+ });
+
+ it('should resolve with response body', function () {
+ var expectedResponse = helper.fakeResponse({ body: requestedPage });
+ var expectedParams = helper.extendParams({ callback: undefined });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper.get(uri);
+
+ return promise.then(function (body) {
+ expect(Request).to.be.calledOnceWithExactly(expectedParams);
+ expect(body).to.be.equal(requestedPage);
+ });
+ });
+
+ it('should resolve with full response', function () {
+ var expectedResponse = helper.fakeResponse({
+ statusCode: 200,
+ body: requestedPage
+ });
+
+ var expectedParams = helper.extendParams({
+ callback: undefined,
+ resolveWithFullResponse: true
+ });
+
+ // The method is implicitly GET
+ delete expectedParams.method;
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var promise = cloudscraper({
+ uri: uri,
+ resolveWithFullResponse: true
+ });
+
+ return promise.then(function (response) {
+ expect(Request).to.be.calledOnceWithExactly(expectedParams);
+
+ expect(response).to.be.equal(expectedResponse);
+ expect(response.body).to.be.equal(requestedPage);
+ });
+ });
+
+ // The helper calls the fake request callback synchronously. This results
+ // in the promise being rejected before we catch it in the test.
+ // This can be noticeable if we return the promise instead of calling done.
+ it('should define catch', function (done) {
+ var expectedResponse = helper.fakeResponse({ error: new Error('fake') });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var caught = false;
+ var promise = cloudscraper(uri);
+
+ promise.catch(function () {
+ caught = true;
+ }).then(function () {
+ if (caught) done();
+ });
+ });
+
+ it('should define finally', function (done) {
+ var expectedResponse = helper.fakeResponse({ error: new Error('fake') });
+
+ Request.callsFake(helper.fakeRequest({ response: expectedResponse }));
+
+ var caught = false;
+ var promise = cloudscraper(uri);
+
+ promise.then(function () {
+ caught = true;
+ }).finally(function () {
+ if (!caught) done();
+ });
+ });
+});