From cdae0930d1fc9fba2574163150fdad6342d7054e Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 02:56:58 -0600 Subject: [PATCH 01/33] Use requester argument of request.defaults --- index.js | 165 +++++++++++++++++++------------------------------------ 1 file changed, 58 insertions(+), 107 deletions(-) diff --git a/index.js b/index.js index a2c51b4..7e810ea 100644 --- a/index.js +++ b/index.js @@ -1,94 +1,62 @@ var vm = require('vm'); var requestModule = require('request'); -var jar = requestModule.jar(); - -var request = requestModule.defaults({jar: jar}); // Cookies should be enabled -var UserAgent = 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36'; -var Timeout = 6000; // Cloudflare requires a delay of 5 seconds, so wait for at least 6. -var cloudscraper = {}; -var MaxChallengesToSolve = 3; // Support only this max challenges in row. If CF returns more, throw an error - -/** - * Performs get request to url with headers. - * @param {String} url - * @param {Function} callback function(error, response, body) {} - * @param {Object} headers Hash with headers, e.g. {'Referer': 'http://google.com', 'User-Agent': '...'} - */ -cloudscraper.get = function(url, callback, headers) { - performRequest({ - method: 'GET', - url: url, - headers: headers - }, callback); -}; - -/** - * Performs post request to url with headers. - * @param {String} url - * @param {String|Object} body Will be passed as form data - * @param {Function} callback function(error, response, body) {} - * @param {Object} headers Hash with headers, e.g. {'Referer': 'http://google.com', 'User-Agent': '...'} - */ -cloudscraper.post = function(url, body, callback, headers) { - var data = ''; - var bodyType = Object.prototype.toString.call(body); - - if(bodyType === '[object String]') { - data = body; - } else if (bodyType === '[object Object]') { - data = Object.keys(body).map(function(key) { - return key + '=' + body[key]; - }).join('&'); - } - headers = headers || {}; - headers['Content-Type'] = headers['Content-Type'] || 'application/x-www-form-urlencoded; charset=UTF-8'; - headers['Content-Length'] = headers['Content-Length'] || data.length; - - performRequest({ - method: 'POST', - body: data, - url: url, - headers: headers - }, callback); -}; - -/** - * Performs get or post request with generic request options - * @param {Object} options Object to be passed to request's options argument - * @param {Function} callback function(error, response, body) {} - */ -cloudscraper.request = function(options, callback) { - performRequest(options, callback); -}; - -function performRequest(options, callback) { - options = options || {}; - options.headers = options.headers || {}; - - options.headers['Cache-Control'] = options.headers['Cache-Control'] || 'private'; - options.headers['Accept'] = options.headers['Accept'] || 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5'; - - makeRequest = requestMethod(options.method); - - //Can't just do the normal options.encoding || 'utf8' - //because null is a valid encoding. - if('encoding' in options) { - options.realEncoding = options.encoding; - } else { - options.realEncoding = 'utf8'; +var originalDefaults = requestModule.defaults; + +module.exports = defaults.call(requestModule, { + // Cookies should be enabled + jar: requestModule.jar(), + headers: { + 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', + 'Cache-Control': 'private', + 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + }, + // Cloudflare requires a delay of 5 seconds, so wait for at least 6. + cloudflareTimeout: 6000, + // followAllRedirects - follow non-GET HTTP 3xx responses as redirects + followAllRedirects: true, + // Support only this max challenges in row. If CF returns more, throw an error + challengesToSolve: 3 +}); + +function defaults(options) { + var cloudscraper = originalDefaults.call(this, options, requester); + + if (requestModule === this) { + cloudscraper.defaults = defaults; + } + // Expose the debug option + Object.defineProperty(cloudscraper, 'debug', + Object.getOwnPropertyDescriptor(this, 'debug')); + + return cloudscraper; +} + +function requester(options, callback) { + // Prevent overwriting realEncoding in subsequent calls + if (!('realEncoding' in options)) { + // Can't just do the normal options.encoding || 'utf8' + // because null is a valid encoding. + if ('encoding' in options) { + options.realEncoding = options.encoding; + } else { + options.realEncoding = 'utf8'; + } } + options.encoding = null; - if (!options.url || !callback) { - throw new Error('To perform request, define both url and callback'); + if (typeof callback !== 'function') { + throw new TypeError('Expected a callback function, got ' + + typeof(callback) + ' instead.'); } - options.headers['User-Agent'] = options.headers['User-Agent'] || UserAgent; - options.challengesToSolve = options.challengesToSolve || MaxChallengesToSolve; // Might not be the best way how to pass this variable - options.followAllRedirects = options.followAllRedirects === undefined ? true : options.followAllRedirects; + if (isNaN(options.challengesToSolve)) { + throw new TypeError('Expected `challengesToSolve` option to be a number, ' + + 'got ' + typeof(options.challengesToSolve) + ' instead.'); + } - makeRequest(options, function(error, response, body) { + requestModule(options, function(error, response, body) { processRequestResponse(options, {error: error, response: response, body: body}, callback); }); } @@ -101,7 +69,6 @@ function processRequestResponse(options, requestResult, callback) { var stringBody; var isChallengePresent; var isRedirectChallengePresent; - var isTargetPage; // Meaning we have finally reached the target page if (error || !body || !body.toString) { return callback({ errorType: 0, error: error }, response, body); @@ -115,9 +82,9 @@ function processRequestResponse(options, requestResult, callback) { isChallengePresent = stringBody.indexOf('a = document.getElementById(\'jschl-answer\');') !== -1; isRedirectChallengePresent = stringBody.indexOf('You are being redirected') !== -1 || stringBody.indexOf('sucuri_cloudproxy_js') !== -1; - isTargetPage = !isChallengePresent && !isRedirectChallengePresent; + // isTargetPage = !isChallengePresent && !isRedirectChallengePresent; - if(isChallengePresent && options.challengesToSolve == 0) { + if (isChallengePresent && options.challengesToSolve === 0) { return callback({ errorType: 4 }, response, body); } @@ -125,7 +92,7 @@ function processRequestResponse(options, requestResult, callback) { if (isChallengePresent) { setTimeout(function() { solveChallenge(response, stringBody, options, callback); - }, Timeout); + }, options.cloudflareTimeout); } else if (isRedirectChallengePresent) { setCookieAndReload(response, stringBody, options, callback); } else { @@ -160,7 +127,6 @@ function checkForErrors(error, body) { function solveChallenge(response, body, options, callback) { var challenge = body.match(/name="jschl_vc" value="(\w+)"/); var host = response.request.host; - var makeRequest = requestMethod(options.method); var jsChlVc; var answerResponse; var answerUrl; @@ -177,7 +143,7 @@ function solveChallenge(response, body, options, callback) { return callback({errorType: 3, error: 'I cant extract method from setTimeOut wrapper'}, response, body); } - challenge_pass = body.match(/name="pass" value="(.+?)"/)[1]; + var challenge_pass = body.match(/name="pass" value="(.+?)"/)[1]; challenge = challenge[1]; @@ -204,14 +170,11 @@ function solveChallenge(response, body, options, callback) { options.challengesToSolve = options.challengesToSolve - 1; // Make request with answer - makeRequest(options, function(error, response, body) { - processRequestResponse(options, {error: error, response: response, body: body}, callback); - }); + requester(options, callback); } function setCookieAndReload(response, body, options, callback) { var challenge = body.match(/S='([^']+)'/); - var makeRequest = requestMethod(options.method); if (!challenge) { return callback({errorType: 3, error: 'I cant extract cookie generation code from page'}, response, body); @@ -230,24 +193,14 @@ function setCookieAndReload(response, body, options, callback) { vm.runInNewContext(cookieSettingCode, sandbox); try { - jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); + options.jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); } catch (err) { return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body); } options.challengesToSolve = options.challengesToSolve - 1; - makeRequest(options, function(error, response, body) { - processRequestResponse(options, {error: error, response: response, body: body}, callback); - }); -} - -// Workaround for better testing. Request has pretty poor API -function requestMethod(method) { - // For now only GET and POST are supported - method = method.toUpperCase(); - - return method === 'POST' ? request.post : request.get; + requester(options, callback); } function processResponseBody(options, error, response, body, callback) { @@ -263,6 +216,4 @@ function processResponseBody(options, error, response, body, callback) { callback(error, response, body); -} - -module.exports = cloudscraper; +} \ No newline at end of file From 20890dc61ad5741ff6d3e443a50eb1e45c1d0666 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:03:24 -0600 Subject: [PATCH 02/33] Create new headers object before assigning referer --- index.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index 7e810ea..3f65f44 100644 --- a/index.js +++ b/index.js @@ -164,6 +164,8 @@ function solveChallenge(response, body, options, callback) { answerUrl = response.request.uri.protocol + '//' + host + '/cdn-cgi/l/chk_jschl'; + // Prevent reusing the headers object in subsequent calls as this affects tests + options.headers = Object.assign({}, options.headers); options.headers['Referer'] = response.request.uri.href; // Original url should be placed as referer options.url = answerUrl; options.qs = answerResponse; @@ -216,4 +218,4 @@ function processResponseBody(options, error, response, body, callback) { callback(error, response, body); -} \ No newline at end of file +} From b4dac7831917cdd9ca6f70a5a86980caf25947fd Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:04:09 -0600 Subject: [PATCH 03/33] Prefer using uri over url to prevent option conflict --- index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 3f65f44..a714698 100644 --- a/index.js +++ b/index.js @@ -166,8 +166,9 @@ function solveChallenge(response, body, options, callback) { // Prevent reusing the headers object in subsequent calls as this affects tests options.headers = Object.assign({}, options.headers); - options.headers['Referer'] = response.request.uri.href; // Original url should be placed as referer - options.url = answerUrl; + // Original uri should be placed as referer + options.headers['Referer'] = response.request.uri.href; + options.uri = answerUrl; options.qs = answerResponse; options.challengesToSolve = options.challengesToSolve - 1; From 39e714e7664ff3f69bf98688ce911a8d1a375deb Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:07:36 -0600 Subject: [PATCH 04/33] Major update to unit tests Rename/move specs to test so Mocha can find them Remove Gruntfile, it was only being used to start mocha and has outdated dependencies Remove grunt related dev-deps Rename/move test files Update test dependencies, add sinon-chai Change test script from grunt to mocha Add mocha.opts --- Gruntfile.js | 20 ------------------- mocha.opts | 2 ++ package.json | 12 +++++------ specs/chai.js | 12 ----------- test/common.js | 6 ++++++ {specs => test}/fixtures/access_denied.html | 0 {specs => test}/fixtures/captcha.html | 0 .../fixtures/invalid_js_challenge.html | 0 .../fixtures/js_challenge_03_12_2018_1.html | 0 .../fixtures/js_challenge_03_12_2018_2.html | 0 .../fixtures/js_challenge_09_06_2016.html | 0 .../fixtures/js_challenge_21_05_2015.html | 0 .../fixtures/js_challenge_cookie.html | 0 .../fixtures/page_with_recaptcha.html | 0 {specs => test}/fixtures/requested_page.html | 0 specs/spec_helper.js => test/helper.js | 0 specs/tests/errors.js => test/test-errors.js | 0 .../cloudscraper.js => test/test-index.js | 0 18 files changed, 13 insertions(+), 39 deletions(-) delete mode 100644 Gruntfile.js create mode 100644 mocha.opts delete mode 100644 specs/chai.js create mode 100644 test/common.js rename {specs => test}/fixtures/access_denied.html (100%) rename {specs => test}/fixtures/captcha.html (100%) rename {specs => test}/fixtures/invalid_js_challenge.html (100%) rename {specs => test}/fixtures/js_challenge_03_12_2018_1.html (100%) rename {specs => test}/fixtures/js_challenge_03_12_2018_2.html (100%) rename {specs => test}/fixtures/js_challenge_09_06_2016.html (100%) rename {specs => test}/fixtures/js_challenge_21_05_2015.html (100%) rename {specs => test}/fixtures/js_challenge_cookie.html (100%) rename {specs => test}/fixtures/page_with_recaptcha.html (100%) rename {specs => test}/fixtures/requested_page.html (100%) rename specs/spec_helper.js => test/helper.js (100%) rename specs/tests/errors.js => test/test-errors.js (100%) rename specs/tests/cloudscraper.js => test/test-index.js (100%) diff --git a/Gruntfile.js b/Gruntfile.js deleted file mode 100644 index 4617377..0000000 --- a/Gruntfile.js +++ /dev/null @@ -1,20 +0,0 @@ -module.exports = function(grunt) { - - grunt.loadNpmTasks('grunt-mocha-test'); - - grunt.initConfig({ - mochaTest: { - test: { - options: { - globals: ['expect', 'sinon'], - reporter: 'spec', - quiet: false, - require: './specs/chai' - }, - src: ['specs/**/*.js'] - } - } - }); - - grunt.registerTask('default', ['mochaTest']); -}; diff --git a/mocha.opts b/mocha.opts new file mode 100644 index 0000000..e32c7e6 --- /dev/null +++ b/mocha.opts @@ -0,0 +1,2 @@ +--reporter spec +--require tests/common \ No newline at end of file diff --git a/package.json b/package.json index 83f4d1b..4e658ed 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "description": "Bypasses cloudflare's anti-ddos page", "main": "index.js", "scripts": { - "test": "grunt", + "test": "mocha", "lint": "eslint ." }, "repository": { @@ -27,17 +27,15 @@ "request": "^2.88.0" }, "devDependencies": { + "chai": "^4.2.0", "eslint": "^5.14.1", "eslint-config-standard": "^12.0.0", "eslint-plugin-import": "^2.16.0", "eslint-plugin-node": "^8.0.1", "eslint-plugin-promise": "^4.0.1", "eslint-plugin-standard": "^4.0.0", - "chai": "^1.10.0", - "grunt": "^0.4.5", - "grunt-cli": "^1.2.0", - "grunt-mocha-test": "^0.12.4", - "mocha": "^2.0.1", - "sinon": "^1.12.1" + "mocha": "^6.0.2", + "sinon": "^7.2.4", + "sinon-chai": "^3.3.0" } } diff --git a/specs/chai.js b/specs/chai.js deleted file mode 100644 index 43994da..0000000 --- a/specs/chai.js +++ /dev/null @@ -1,12 +0,0 @@ -var chai = require('chai'), - sinon = require('sinon'); - -chai.expect(); - -chai.config.includeStack = true; - -global.expect = chai.expect; -global.AssertionError = chai.AssertionError; -global.Assertion = chai.Assertion; -global.assert = chai.assert; -global.sinon = sinon; diff --git a/test/common.js b/test/common.js new file mode 100644 index 0000000..4094503 --- /dev/null +++ b/test/common.js @@ -0,0 +1,6 @@ +'use strict'; + +var chai = require('chai'); + +chai.use(require('sinon-chai')); +chai.config.includeStack = true; \ No newline at end of file diff --git a/specs/fixtures/access_denied.html b/test/fixtures/access_denied.html similarity index 100% rename from specs/fixtures/access_denied.html rename to test/fixtures/access_denied.html diff --git a/specs/fixtures/captcha.html b/test/fixtures/captcha.html similarity index 100% rename from specs/fixtures/captcha.html rename to test/fixtures/captcha.html diff --git a/specs/fixtures/invalid_js_challenge.html b/test/fixtures/invalid_js_challenge.html similarity index 100% rename from specs/fixtures/invalid_js_challenge.html rename to test/fixtures/invalid_js_challenge.html diff --git a/specs/fixtures/js_challenge_03_12_2018_1.html b/test/fixtures/js_challenge_03_12_2018_1.html similarity index 100% rename from specs/fixtures/js_challenge_03_12_2018_1.html rename to test/fixtures/js_challenge_03_12_2018_1.html diff --git a/specs/fixtures/js_challenge_03_12_2018_2.html b/test/fixtures/js_challenge_03_12_2018_2.html similarity index 100% rename from specs/fixtures/js_challenge_03_12_2018_2.html rename to test/fixtures/js_challenge_03_12_2018_2.html diff --git a/specs/fixtures/js_challenge_09_06_2016.html b/test/fixtures/js_challenge_09_06_2016.html similarity index 100% rename from specs/fixtures/js_challenge_09_06_2016.html rename to test/fixtures/js_challenge_09_06_2016.html diff --git a/specs/fixtures/js_challenge_21_05_2015.html b/test/fixtures/js_challenge_21_05_2015.html similarity index 100% rename from specs/fixtures/js_challenge_21_05_2015.html rename to test/fixtures/js_challenge_21_05_2015.html diff --git a/specs/fixtures/js_challenge_cookie.html b/test/fixtures/js_challenge_cookie.html similarity index 100% rename from specs/fixtures/js_challenge_cookie.html rename to test/fixtures/js_challenge_cookie.html diff --git a/specs/fixtures/page_with_recaptcha.html b/test/fixtures/page_with_recaptcha.html similarity index 100% rename from specs/fixtures/page_with_recaptcha.html rename to test/fixtures/page_with_recaptcha.html diff --git a/specs/fixtures/requested_page.html b/test/fixtures/requested_page.html similarity index 100% rename from specs/fixtures/requested_page.html rename to test/fixtures/requested_page.html diff --git a/specs/spec_helper.js b/test/helper.js similarity index 100% rename from specs/spec_helper.js rename to test/helper.js diff --git a/specs/tests/errors.js b/test/test-errors.js similarity index 100% rename from specs/tests/errors.js rename to test/test-errors.js diff --git a/specs/tests/cloudscraper.js b/test/test-index.js similarity index 100% rename from specs/tests/cloudscraper.js rename to test/test-index.js From d574123940a997ce71c9d42186917b82435e8419 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:08:26 -0600 Subject: [PATCH 05/33] Rewrite all tests --- test/helper.js | 98 ++++++---- test/test-errors.js | 340 ++++++++++++++++++--------------- test/test-index.js | 444 +++++++++++++++++++++++--------------------- 3 files changed, 488 insertions(+), 394 deletions(-) diff --git a/test/helper.js b/test/helper.js index c07e710..23461ce 100644 --- a/test/helper.js +++ b/test/helper.js @@ -1,45 +1,79 @@ -var fs = require('fs'); -var urlLib = require('url'); -var path = require('path'); +var request = require('request'); +var sinon = require('sinon'); +var fs = require('fs'); +var url = require('url'); +var path = require('path'); -var testDefaults = { - url: 'http://example-site.dev/path/', - headers: {'User-Agent': 'Chrome'} +var defaultParams = { + // Since cloudscraper wraps the callback, just ensure callback is a function + callback: sinon.match.func, + jar: request.jar(), + uri: 'http://example-site.dev/path/', + headers: { + "User-Agent": "Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36", + "Cache-Control": "private", + "Accept": "application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5" + }, + method: 'GET', + encoding: null, + realEncoding: 'utf8', + followAllRedirects: true, + cloudflareTimeout: 6000, + challengesToSolve: 3 }; +// Cache fixtures so they're not read from the fs but once +var cache = {}; + module.exports = { getFixture: function(fileName) { - return fs.readFileSync('./specs/fixtures/' + fileName, 'utf8'); + if (cache[fileName] === undefined) { + // noinspection JSUnresolvedVariable + cache[fileName] = fs.readFileSync(path.join(__dirname, 'fixtures', fileName), 'utf8'); + } + return cache[fileName]; }, - testDefaults: testDefaults, + defaultParams: defaultParams, // This method returns properly faked response object for request lib, which is used inside cloudscraper library - fakeResponseObject: function(statusCode, headers, body, url) { - var parsedUri = urlLib.parse(url); - parsedUri.uri = parsedUri; + fakeResponse: function(template) { + var fake = Object.assign({ + statusCode: 200, + headers: defaultParams.headers, + body: '', + }, template); - return { - statusCode: statusCode, - headers: headers, - body: body, - request: parsedUri //actually this is more compilcated object, but library uses only uri parts. + // The uri property of the fake response is only for tests to simplify fake request creation. + var uri = url.parse(fake.uri || defaultParams.uri); + // The actual request object is more complicated but this library only uses the uri parts. + fake.request = { + host: uri.host, + uri: uri }; + + return fake; }, - // Terrible hack. But because of request library API, it is impossible to normally stub it. That is why cloudscraper's index.js is removed from cache each time - dropCache: function() { - var pathToLib = path.normalize(__dirname + '/../index.js'); - if (require.cache[pathToLib]) { - delete require.cache[pathToLib]; - } + extendParams: function(params) { + // Extend target with the default params and provided params + var target = Object.assign({}, defaultParams, params); + // Extend target.headers with defaults headers and provided headers + target.headers = Object.assign({}, defaultParams.headers, params.headers); + return target; }, - requestParams: function(params) { - return Object.assign({ - method: 'GET', - url: testDefaults.url, - headers: testDefaults.headers, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 3 - }, params); + fakeRequest: function(template) { + // In this context, fake is the request result + var fake = Object.assign({ + error: null, + // Set the default fake statusCode to 500 if an error is provided + response: { statusCode: template.error ? 500 : 200 }, + }, template); + + // Use the body from fake response if the template doesn't provide it + if (!('body' in fake)) { + fake.body = fake.response.body; + } + + return function Request(params) { + params.callback(fake.error, fake.response, fake.body); + }; } }; diff --git a/test/test-errors.js b/test/test-errors.js index c14ef17..c1d18be 100644 --- a/test/test-errors.js +++ b/test/test-errors.js @@ -1,28 +1,22 @@ -var helper = require('../spec_helper'); +'use strict'; + +var cloudscraper = require('../index'); var request = require('request'); +var helper = require('./helper'); + +var sinon = require('sinon'); +var expect = require('chai').expect; describe('Cloudscraper', function() { + var uri = helper.defaultParams.uri; var sandbox; - var captchaPage = helper.getFixture('captcha.html'); - var accessDenied = helper.getFixture('access_denied.html'); - var invalidChallenge = helper.getFixture('invalid_js_challenge.html'); - var url = helper.testDefaults.url; - var headers = helper.testDefaults.headers; - - // Since request.defaults returns new wrapper, create one global instance and then stub it in beforeEach - var requestDefault = request.defaults({jar: true}); - var defaultWithArgs = helper.requestParams({}); - - var cloudscraper; - before(function() { - helper.dropCache(); - }); + var Request; beforeEach(function () { - sandbox = sinon.sandbox.create(); - sandbox.stub(request, 'defaults').returns(requestDefault); - cloudscraper = require('../../index'); - // since cloudflare requires timeout, the module relies on setTimeout. It should be proprely stubbed to avoid ut running for too long + sandbox = sinon.createSandbox(); + // Prepare stubbed Request for each test + Request = sandbox.stub(request, 'Request'); + // setTimeout should be properly stubbed to prevent the unit test from running too long. this.clock = sinon.useFakeTimers(); }); @@ -32,189 +26,229 @@ describe('Cloudscraper', function() { }); it('should return error if it was thrown by request', function(done) { - var response = { statusCode: 500 }, - fakeError = {fake: 'error'}; //not real request error, but it doesn't matter + var fakeError = new Error('fake error'); + + Request.callsFake(helper.fakeRequest({ error: fakeError })); - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, fakeError, response, ''); + cloudscraper.get(uri, function(error) { + // errorType 0, means it is some kind of system error + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 0, error: fakeError }); + expect(error.error).to.be.an('error'); - cloudscraper.get(url, function(error) { - expect(error).to.be.eql({errorType: 0, error: fakeError}); // errorType 0, means it is some kind of system error + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); done(); - }, headers); + }); }); - it('should return error if captcha is served by cloudflare', function(done){ - var response = { statusCode: 503 }; + it('should return error if captcha is served by cloudflare', function(done) { + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('captcha.html') + }); - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, captchaPage); + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 1}); // errorType 1, means captcha is served - expect(response).to.be.eql(captchaPage); + cloudscraper.get(uri, function(error, response, body) { + // errorType 1, means captcha is served + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 1 }); + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); }); - it('should return error if cloudflare returned some inner error', function(done){ - //https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 - var response = { statusCode: 500 }; + it('should return error if cloudflare returned some inner error', function(done) { + // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages + // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 + + var onlyResponse = helper.fakeResponse({ + statusCode: 500, + body: helper.getFixture('access_denied.html') + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, accessDenied); + cloudscraper.get(uri, function(error, response, body) { + // errorType 2, means inner cloudflare error + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 2, error: 1006 }); - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 2, error: 1006}); // errorType 2, means inner cloudflare error - expect(response).to.be.eql(accessDenied); + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); }); - it('should return errior if cf presented more than 3 challenges in a row', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; + it('should return error if cf presented more than 3 challenges in a row', function(done) { + // The expected params for all subsequent calls to Request + var expectedParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + }); + + // Perform less strict matching on headers and qs to simplify this test + Object.assign(expectedParams, { + headers: sinon.match.object, + qs: sinon.match.object + }); - var pageWithCaptchaResponse = { statusCode: 200 }; // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({url: url, headers: headers})) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get returns challenge - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 2 - }) - .callsArgWith(1, null, response, jsChallengePage); - - // Third call to request.get returns challenge - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 1 - }) - .callsArgWith(1, null, response, jsChallengePage); - - // Fourth call to request.get still returns a challenge - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, - challengesToSolve: 0 - }) - .callsArgWith(1, null, response, jsChallengePage); - - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 4}); // errorType 1, means captcha is served - expect(response).to.be.eql(jsChallengePage); + var expectedResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_09_06_2016.html') + }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 4 }); + + expect(Request.callCount).to.be.equal(4); + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + var total = helper.defaultParams.challengesToSolve + 1; + for (var i = 1; i < total; i++) { + // Decrement the number of challengesToSolve to match actual params + expectedParams.challengesToSolve -= 1; + expect(Request.getCall(i)).to.be.calledWithExactly(expectedParams); + } + + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); done(); - }, headers); + }); this.clock.tick(200000); // tick the timeout }); - it('should return error if body is undefined', function(done){ - //https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 - var response = { statusCode: 500 }; + it('should return error if body is undefined', function(done) { + // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages + // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 + + Request.callsFake(helper.fakeRequest({ + response: { statusCode: 500} + })); - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, undefined); + cloudscraper.get(uri, function(error, response, body) { + // errorType 2, means inner cloudflare error + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 0, error: null }); - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 0, error: null}); // errorType 2, means inner cloudflare error - expect(response).to.be.eql(undefined); + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + expect(body).to.be.equal(undefined); done(); - }, headers); + }); }); it('should return error if challenge page failed to be parsed', function(done) { - var response = helper.fakeResponseObject(200, headers, invalidChallenge, url); - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, invalidChallenge); - - cloudscraper.get(url, function(error, body, response) { - expect(error.errorType).to.be.eql(3); // errorType 3, means parsing failed - expect(response).to.be.eql(invalidChallenge); + var onlyResponse = helper.fakeResponse({ + body: helper.getFixture('invalid_js_challenge.html') + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + cloudscraper.get(uri, function(error, response, body) { + // errorType 3, means parsing failed + expect(error).to.be.an('object'); + expect(error).to.own.include({ errorType: 3 }); + + expect(Request).to.be.calledOnce; + expect(Request).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); this.clock.tick(7000); // tick the timeout }); it('should return error if it was thrown by request when solving challenge', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_21_05_2015.html'), - response = helper.fakeResponseObject(503, headers, jsChallengePage, url), - connectionError = {error: 'ECONNRESET'}, - stubbed; + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_21_05_2015.html') + }); + + var fakeError = Object.assign(new Error('read ECONNRESET'), { + code: 'ECONNRESET', errno: 'ECONNRESET', syscall: 'read' + }); // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .onCall(0) - .callsArgWith(1, null, response, jsChallengePage); + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: onlyResponse })); - stubbed - .onCall(1) - .callsArgWith(1, connectionError); + Request.onSecondCall() + .callsFake(helper.fakeRequest({ error: fakeError })); - cloudscraper.get(url, function(error) { - expect(error).to.be.eql({errorType: 0, error: connectionError}); // errorType 0, connection eror for example + cloudscraper.get(uri, function(error) { + // errorType 0, a connection error for example + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 0, error: fakeError }); + expect(error.error).to.be.an('error'); + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); done(); - }, headers); + }); - this.clock.tick(7000); // tick the timeout + // tick the timeout + this.clock.tick(7000); }); it('should properly handle a case when after a challenge another one is returned', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - - var pageWithCaptchaResponse = { statusCode: 200 }; // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({url: url, headers: headers})) - .callsArgWith(1, null, response, jsChallengePage); + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_09_06_2016.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); // Second call to request.get returns recaptcha - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', - qs: sinon.match.any, - headers: sinon.match.any, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', challengesToSolve: 2 - }) - .callsArgWith(1, null, pageWithCaptchaResponse, captchaPage); + }); + + // Perform less strict matching on headers and qs to simplify this test + Object.assign(secondParams, { + headers: sinon.match.object, + qs: sinon.match.object + }); + + var secondResponse = helper.fakeResponse({ + body: helper.getFixture('captcha.html') + }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + cloudscraper.get(uri, function(error, response, body) { + // errorType 1, means captcha is served + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 1 }); + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); - cloudscraper.get(url, function(error, body, response) { - expect(error).to.be.eql({errorType: 1}); // errorType 1, means captcha is served - expect(response).to.be.eql(captchaPage); + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); done(); - }, headers); + }); this.clock.tick(7000); // tick the timeout }); diff --git a/test/test-index.js b/test/test-index.js index b0f8d24..fda9c8a 100644 --- a/test/test-index.js +++ b/test/test-index.js @@ -1,29 +1,23 @@ -var helper = require('../spec_helper'); +'use strict'; + +var cloudscraper = require('../index'); var request = require('request'); +var helper = require('./helper'); + +var sinon = require('sinon'); +var expect = require('chai').expect; describe('Cloudscraper', function() { var requestedPage = helper.getFixture('requested_page.html'); - var url = helper.testDefaults.url; - var headers = helper.testDefaults.headers; - - // Since request.jar returns new cookie jar instance, create one global instance and then stub it in beforeEach - var jar = request.jar(); - // Since request.defaults returns new wrapper, create one global instance and then stub it in beforeEach - var requestDefault = request.defaults({jar: jar}); - var defaultWithArgs = helper.requestParams({}); - - var cloudscraper; + var uri = helper.defaultParams.uri; var sandbox; - before(function() { - helper.dropCache(); - }); + var Request; beforeEach(function () { - sandbox = sinon.sandbox.create(); - sandbox.stub(request, 'jar').returns(jar); - sandbox.stub(request, 'defaults').returns(requestDefault); - cloudscraper = require('../../index'); - // since cloudflare requires timeout, the module relies on setTimeout. It should be proprely stubbed to avoid ut running for too long + sandbox = sinon.createSandbox(); + // Prepare stubbed Request for each test + Request = sandbox.stub(request, 'Request'); + // setTimeout should be properly stubbed to prevent the unit test from running too long. this.clock = sinon.useFakeTimers(); }); @@ -33,282 +27,314 @@ describe('Cloudscraper', function() { }); it('should return requested page, if cloudflare is disabled for page', function(done) { - var expectedResponse = { statusCode: 200 }; + var onlyResponse = helper.fakeResponse({ + statusCode: 200, + body: requestedPage + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); - // Stub first call, which request makes to page. It should return requested page - sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({})) - .callsArgWith(1, null, expectedResponse, requestedPage); - - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(expectedResponse); + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); }); - it('should not trigged any error if recaptcha is present in page not protected by CF', function(done) { - var expectedResponse = { statusCode: 200 }; - var pageWithCaptcha = helper.getFixture('page_with_recaptcha.html'); + it('should not trigger any error if recaptcha is present in page not protected by CF', function(done) { + var onlyResponse = helper.fakeResponse({ + statusCode: 200, + body: helper.getFixture('page_with_recaptcha.html') + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.null; - sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, expectedResponse, pageWithCaptcha); + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(pageWithCaptcha); - expect(response).to.be.equal(expectedResponse); + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); }); it('should resolve challenge (version as on 21.05.2015) and then return page', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_21_05_2015.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get will have challenge solution - // It should contain url, answer, headers with Referer - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_21_05_2015.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', qs: { 'jschl_vc': '89cdff5eaa25923e0f26e29e5195dce9', - 'jschl_answer': 633 + 'example-site.dev'.length, // 633 is a answer to cloudflares js challenge in this particular case + // 633 is a answer to cloudflare's js challenge in this particular case + 'jschl_answer': 633 + 'example-site.dev'.length, 'pass': '1432194174.495-8TSfc235EQ' }, headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + 'Referer': 'http://example-site.dev/path/' }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, challengesToSolve: 2 - }) - .callsArgWith(1, null, response, requestedPage); + }); + + // Second call to Request will have challenge solution + // It should contain uri, answer, headers with Referer + var secondResponse = helper.fakeResponse({ body: requestedPage }); - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(response); + Request.onSecondCall()// Cloudflare is enabled for site. It returns a page with js challenge + .callsFake(helper.fakeRequest({ response: secondResponse})); + + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); done(); - }, headers); + }); - this.clock.tick(7000); // tick the timeout + // tick the timeout + this.clock.tick(7000); }); it('should resolve challenge (version as on 09.06.2016) and then return page', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_09_06_2016.html'); - var response = helper.fakeResponseObject(503, headers, jsChallengePage, url); - var stubbed; - // Cloudflare is enabled for site. It returns a page with js challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, response, jsChallengePage); - - // Second call to request.get will have challenge solution - // It should contain url, answer, headers with Referer - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_09_06_2016.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', qs: { 'jschl_vc': '346b959db0cfa38f9938acc11d6e1e6e', - 'jschl_answer': 6632 + 'example-site.dev'.length, // 6632 is a answer to cloudflares js challenge in this particular case + // 6632 is a answer to cloudflares js challenge in this particular case + 'jschl_answer': 6632 + 'example-site.dev'.length, 'pass': '1465488330.6-N/NbGTg+IM' }, headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + 'Referer': 'http://example-site.dev/path/' }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, challengesToSolve: 2 - }) - .callsArgWith(1, null, response, requestedPage); + }); - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(response); - done(); - }, headers); + // Second call to Request will have challenge solution + // It should contain uri, answer, headers with Referer + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.called; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledTwice; + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + done(); + }); this.clock.tick(7000); // tick the timeout }); it('should resolve 2 consequent challenges', function(done) { - var jsChallengePage1 = helper.getFixture('js_challenge_03_12_2018_1.html'); - var jsChallengePage2 = helper.getFixture('js_challenge_03_12_2018_2.html'); - var responseJsChallengePage1 = helper.fakeResponseObject(503, headers, jsChallengePage1, url); - var responseJsChallengePage2 = helper.fakeResponseObject(503, headers, jsChallengePage2, url); - var stubbed; - // First call and CF returns a challenge - stubbed = sandbox.stub(requestDefault, 'get') - .withArgs(defaultWithArgs) - .callsArgWith(1, null, responseJsChallengePage1, jsChallengePage1); + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); - // We submit a solution to the first challenge, but CF decided to give us a second one - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', qs: { 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa', 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge in this particular case 'pass': '1543827239.915-44n9IE20mS' }, headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + 'Referer': 'http://example-site.dev/path/' }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, challengesToSolve: 2 - }) - .callsArgWith(1, null, responseJsChallengePage2, jsChallengePage2); + }); - // We submit a solution to the second challenge and CF returns requested page - stubbed.withArgs({ - method: 'GET', - url: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + // We submit a solution to the first challenge, but CF decided to give us a second one + var secondResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_2.html') + }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var thirdParams = helper.extendParams({ + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', qs: { 'jschl_vc': 'a41fee3a9f041fea01f0cbf3e8e4d29b', - 'jschl_answer': -1.9145049856 + 'example-site.dev'.length, // 1.9145049856 is a answer to cloudflares js challenge in this particular case + // 1.9145049856 is a answer to cloudflares js challenge in this particular case + 'jschl_answer': -1.9145049856 + 'example-site.dev'.length, 'pass': '1543827246.024-hvxyNA3rOg' }, headers: { - 'User-Agent': 'Chrome', - 'Referer': 'http://example-site.dev/path/', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + 'Referer': 'http://example-site.dev/path/' }, - encoding: null, - realEncoding: 'utf8', - followAllRedirects: true, challengesToSolve: 1 - }) - .callsArgWith(1, null, responseJsChallengePage2, requestedPage); + }); + + var thirdResponse = helper.fakeResponse({ body: requestedPage }); + + // We submit a solution to the second challenge and CF returns requested page + Request.onThirdCall() + .callsFake(helper.fakeRequest({ response: thirdResponse })); + + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledThrice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + expect(Request.thirdCall).to.be.calledWithExactly(thirdParams); - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(response); + expect(response).to.be.equal(thirdResponse); + expect(body).to.be.equal(thirdResponse.body); done(); - }, headers); + }); this.clock.tick(14000); // tick the timeout }); - it('should make post request with body as string', function(done) { - var expectedResponse = { statusCode: 200 }; - var body = 'form-data-body'; - var postHeaders = headers; + it('should make post request with formData', function(done) { + var formData = { some: 'data' }; - postHeaders['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; - postHeaders['Content-Length'] = body.length; + var firstParams = helper.extendParams({ + method: 'POST', + formData: formData + }); + // Stub first call, which request makes to page. It should return requested page + var onlyResponse = helper.fakeResponse({ body: requestedPage }); + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); - // Stub first call, which request makes to page. It should return requested page - sandbox.stub(requestDefault, 'post') - .withArgs(helper.requestParams({url: url, method: 'POST', headers: postHeaders, body: body})) - .callsArgWith(1, null, expectedResponse, requestedPage); - - cloudscraper.post(url, body, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(expectedResponse); + var options = { uri: uri, formData: formData }; + + cloudscraper.post(options, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); }); - it('should make post request with body as object', function(done) { - var expectedResponse = { statusCode: 200 }; - var rawBody = {a: '1', b: 2}; - var encodedBody = 'a=1&b=2'; - var postHeaders = headers; + it('should make delete request', function(done) { + var firstParams = helper.extendParams({ method: 'DELETE' }); + // Stub first call, which request makes to page. It should return requested page + var onlyResponse = helper.fakeResponse({ body: requestedPage }); - postHeaders['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; - postHeaders['Content-Length'] = encodedBody.length; + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); - // Stub first call, which request makes to page. It should return requested page - sandbox.stub(requestDefault, 'post') - .withArgs(helper.requestParams({url: url, method: 'POST', headers: postHeaders, body: encodedBody})) - .callsArgWith(1, null, expectedResponse, requestedPage); - - cloudscraper.post(url, rawBody, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); - expect(response).to.be.equal(expectedResponse); + cloudscraper.delete(uri, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); - }, headers); + }); }); it('should return raw data when encoding is null', function(done) { - var expectedResponse = { statusCode: 200 }; - var requestedData = new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64'); - - sandbox.stub(requestDefault, 'get') - .withArgs(helper.requestParams({url: url, headers: headers, encoding: null, realEncoding: null})) - .callsArgWith(1, null, expectedResponse, requestedData); - - var options = { - method: 'GET', - url: url, - encoding: null, - headers: headers, - followAllRedirects: true - }; - - cloudscraper.request(options, function(error, response, body) { - expect(error).to.be.null(); - expect(response).to.be.equal(expectedResponse); - expect(body).to.be.equal(requestedData); + var firstParams = helper.extendParams({ realEncoding: null }); + // Stub first call, which request makes to page. It should return requested page + var onlyResponse = helper.fakeResponse({ + body: new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64') + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + var options = { uri: uri, encoding: null }; + + cloudscraper.get(options, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); done(); }); }); it('should set the given cookie and then return page', function(done) { - var jsChallengePage = helper.getFixture('js_challenge_cookie.html'); - var response = helper.fakeResponseObject(200, headers, jsChallengePage, url); + var firstResponse = helper.fakeResponse({ + body: helper.getFixture('js_challenge_cookie.html') + }); // Cloudflare is enabled for site. // It returns a redirecting page if a (session) cookie is unset. - sandbox.stub(requestDefault, 'get', function fakeGet(options, cb) { - if (options.url === url) { - var cookieString = jar.getCookieString(url); - if (cookieString === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a') { - cb(null, response, requestedPage); - } else { - cb(null, response, jsChallengePage); - } - } else { - cb(new Error("Unexpected request")); - } + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ challengesToSolve: 2 }); + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + // Only callback with the second response if the cookie string matches + var matchCookie = sinon.match(function(params) { + return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a'; }); - cloudscraper.get(url, function(error, response, body) { - expect(error).to.be.null(); - expect(body).to.be.equal(requestedPage); + // Prevent a matching error if for some reason params.jar is missing or invalid. + var matchParams = sinon.match.has('jar', sinon.match.object).and(matchCookie); + + Request.withArgs(matchParams) + .callsFake(helper.fakeRequest({ response: secondResponse })); + + // We need to override cloudscraper's default jar for this test + var options = { uri: uri, jar: helper.defaultParams.jar }; + + cloudscraper.get(options, function(error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); done(); - }, headers); + }); }); }); From 7564bef88f07cd4a2355bd481adbeda4e73c7fe9 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:47:48 -0600 Subject: [PATCH 06/33] Run lint before test --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 4e658ed..0bc4b71 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "description": "Bypasses cloudflare's anti-ddos page", "main": "index.js", "scripts": { - "test": "mocha", + "test": "npm run lint && mocha", "lint": "eslint ." }, "repository": { From 562cc175ad503f1bc13a021cc37e54b2966f44f4 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:49:48 -0600 Subject: [PATCH 07/33] Add coverage script and deps --- .gitignore | 3 ++- .travis.yml | 4 +++- package.json | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 8b19616..edf6beb 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,5 @@ node_modules # Users Environment Variables .lock-wscript -test.js \ No newline at end of file +test.js +.nyc_output/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index b154529..ca14ddd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,4 +7,6 @@ node_js: - 8 - 6 -sudo: false \ No newline at end of file +sudo: false + +after_success: npm run coverage \ No newline at end of file diff --git a/package.json b/package.json index 0bc4b71..f7ee871 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,8 @@ "description": "Bypasses cloudflare's anti-ddos page", "main": "index.js", "scripts": { - "test": "npm run lint && mocha", + "test": "npm run lint && nyc --reporter=html --reporter=text mocha", + "coverage": "nyc report --reporter=text-lcov | coveralls", "lint": "eslint ." }, "repository": { @@ -28,6 +29,7 @@ }, "devDependencies": { "chai": "^4.2.0", + "coveralls": "^3.0.3", "eslint": "^5.14.1", "eslint-config-standard": "^12.0.0", "eslint-plugin-import": "^2.16.0", @@ -35,6 +37,7 @@ "eslint-plugin-promise": "^4.0.1", "eslint-plugin-standard": "^4.0.0", "mocha": "^6.0.2", + "nyc": "^13.3.0", "sinon": "^7.2.4", "sinon-chai": "^3.3.0" } From 1e425e1f71844ac69aa19e769f82d9b6a13b8e73 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 03:59:26 -0600 Subject: [PATCH 08/33] Add build and coverage badges --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 3d848de..c1dc2f5 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,9 @@ Node.js library to bypass cloudflare's anti-ddos page. [![js-semistandard-style](https://cdn.rawgit.com/flet/semistandard/master/badge.svg)](https://github.com/Flet/semistandard) +[![Build status](https://img.shields.io/travis/codemanki/cloudscraper/master.svg?style=flat-square)](https://travis-ci.org/codemanki/cloudscraper) +[![Coverage](https://img.shields.io/coveralls/codemanki/cloudscraper.svg?style=flat-square)](https://coveralls.io/r/codemanki/cloudscraper) + This library is a port of python module [cloudflare-scrape](https://github.com/Anorov/cloudflare-scrape) with couple enhancements and test cases ;) . All grats to its author \m/ From 0631031ad2fb3de384db17c0b364f43266c6e4a0 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 20:46:06 -0600 Subject: [PATCH 09/33] Remove always false if statement --- index.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/index.js b/index.js index a714698..0bd8c5f 100644 --- a/index.js +++ b/index.js @@ -71,6 +71,7 @@ function processRequestResponse(options, requestResult, callback) { var isRedirectChallengePresent; if (error || !body || !body.toString) { + // Pure request error (bad connection, wrong url, etc) return callback({ errorType: 0, error: error }, response, body); } @@ -104,11 +105,6 @@ function processRequestResponse(options, requestResult, callback) { function checkForErrors(error, body) { var match; - // Pure request error (bad connection, wrong url, etc) - if(error) { - return { errorType: 0, error: error }; - } - // Finding captcha if (body.indexOf('why_captcha') !== -1 || /cdn-cgi\/l\/chk_captcha/i.test(body)) { return { errorType: 1 }; From 5314b9ebd79c282487100f6a3dbd57726d1f0be7 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 20:46:32 -0600 Subject: [PATCH 10/33] Throw errorType 3 for vm error --- index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 0bd8c5f..757a40f 100644 --- a/index.js +++ b/index.js @@ -189,9 +189,9 @@ function setCookieAndReload(response, body, options, callback) { document: {} }; - vm.runInNewContext(cookieSettingCode, sandbox); - try { + vm.runInNewContext(cookieSettingCode, sandbox); + options.jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); } catch (err) { return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body); From c0f5a392b77d5aef9c5ac420452aafc7d6a8a906 Mon Sep 17 00:00:00 2001 From: pro-src Date: Wed, 27 Feb 2019 21:00:09 -0600 Subject: [PATCH 11/33] Increase code coverage to 100% --- test/test-errors.js | 174 ++++++++++++++++++++++++++++++++++++++++++++ test/test-index.js | 8 ++ 2 files changed, 182 insertions(+) diff --git a/test/test-errors.js b/test/test-errors.js index c1d18be..75d3e25 100644 --- a/test/test-errors.js +++ b/test/test-errors.js @@ -131,6 +131,7 @@ describe('Cloudscraper', function() { this.clock.tick(200000); // tick the timeout }); + it('should return error if body is undefined', function(done) { // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 @@ -175,6 +176,64 @@ describe('Cloudscraper', function() { this.clock.tick(7000); // tick the timeout }); + it('should return error if js challenge has error during evaluation', function(done) { + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + // Adds a syntax error near the end of line 37 + onlyResponse.body = onlyResponse.body.replace(/\.toFixed/gm, '..toFixed'); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + cloudscraper.get(uri, function(error, response, body) { + // errorType 3, means parsing failed + expect(error).to.be.an('object'); + expect(error).to.own.include({ errorType: 3 }); + expect(error.error).to.be.a('string'); + expect(error.error).to.include('Error occurred during evaluation: '); + + expect(Request).to.be.calledOnce; + expect(Request).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); + done(); + }); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if challengeId extraction fails', function(done) { + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + onlyResponse.body = onlyResponse.body.replace(/name="jschl_vc"/gm, ''); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + cloudscraper.get(uri, function(error, response, body) { + // errorType 3, means parsing failed + expect(error).to.be.an('object'); + expect(error).to.own.include({ errorType: 3 }); + expect(error.error).to.be.a('string'); + expect(error.error).to.include('I cant extract challengeId'); + + expect(Request).to.be.calledOnce; + expect(Request).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); + done(); + }); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if it was thrown by request when solving challenge', function(done) { var onlyResponse = helper.fakeResponse({ statusCode: 503, @@ -252,4 +311,119 @@ describe('Cloudscraper', function() { this.clock.tick(7000); // tick the timeout }); + + it('should return error if challenge page cookie extraction fails', function(done) { + // Cloudflare is enabled for site. + // It returns a redirecting page if a (session) cookie is unset. + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + // The cookie extraction codes looks for the `S` variable assignment + body: helper.getFixture('js_challenge_cookie.html').replace(/S=/gm, 'Z=') + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + cloudscraper.get(uri, function(error, response, body) { + expect(error).to.be.an('object'); + expect(error).to.be.eql({ + errorType: 3, + error: 'I cant extract cookie generation code from page' + }); + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); + done(); + }); + }); + + it('should throw a TypeError if callback is not a function', function(done) { + var spy = sinon.spy(function() { + cloudscraper.get(uri); + }); + + expect(spy).to.throw(TypeError, /Expected a callback function/); + done(); + }); + + it('should throw a TypeError if challengesToSolve is not a number', function(done) { + var spy = sinon.spy(function() { + var options = { uri: uri, challengesToSolve: 'abc' }; + + cloudscraper.get(options, function(){}); + }); + + expect(spy).to.throw(TypeError, /`challengesToSolve` option .*number/); + done(); + }); + + it('should detect captcha in the response body\'s real encoding', function(done) { + var firstParams = helper.extendParams({ + realEncoding: 'fake-encoding' + }); + + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + body: { + toString: function(encoding) { + if (encoding === 'fake-encoding') { + return helper.getFixture('captcha.html'); + } + + return 'fake response body'; + } + } + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + var options = { uri: uri, encoding: 'fake-encoding' }; + + cloudscraper.get(options, function(error, response, body) { + // errorType 1, means captcha is served + expect(error).to.be.an('object'); + expect(error).to.be.eql({ errorType: 1 }); + + expect(Request).to.be.calledOnce; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body.toString('fake-encoding')); + done(); + }); + + this.clock.tick(7000); // tick the timeout + }); + + it('should return error if cookie setting code evaluation fails', function(done) { + // Change the cookie setting code so the vm will throw an error + var html = helper.getFixture('js_challenge_cookie.html'); + var b64 = (new Buffer('throw new Error(\'vm eval failed\');')).toString('base64'); + + var onlyResponse = helper.fakeResponse({ + statusCode: 503, + body: html.replace(/S='([^']+)'/, 'S=\'' + b64 + '\'') + }); + + Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + + cloudscraper.get(uri, function(error, response, body) { + // errorType 3, means parsing failed + expect(error).to.be.an('object'); + expect(error).to.own.include({ errorType: 3 }); + expect(error.error).to.be.a('string'); + expect(error.error).to.include('Error occurred during evaluation: vm eval failed'); + + expect(Request).to.be.calledOnce; + expect(Request).to.be.calledWithExactly(helper.defaultParams); + + expect(response).to.be.equal(onlyResponse); + expect(body).to.be.equal(onlyResponse.body); + done(); + }); + + this.clock.tick(7000); // tick the timeout + }); }); diff --git a/test/test-index.js b/test/test-index.js index fda9c8a..e3a3368 100644 --- a/test/test-index.js +++ b/test/test-index.js @@ -337,4 +337,12 @@ describe('Cloudscraper', function() { done(); }); }); + + it('should define custom defaults function', function (done) { + expect(cloudscraper.defaults).to.not.equal(request.defaults); + + var custom = cloudscraper.defaults({ challengesToSolve: 5 }); + expect(custom.defaults).to.equal(cloudscraper.defaults); + done(); + }); }); From b2a7ab47d9f1b3b75e0d2c530914f33a234b9df8 Mon Sep 17 00:00:00 2001 From: pro-src Date: Thu, 28 Feb 2019 03:27:31 -0600 Subject: [PATCH 12/33] Add promise support --- index.js | 99 ++++++++++++++++++++++++++++++--------------- package.json | 3 +- test/helper.js | 14 ++++++- test/test-errors.js | 2 +- test/test-index.js | 2 +- 5 files changed, 82 insertions(+), 38 deletions(-) diff --git a/index.js b/index.js index 757a40f..cd340db 100644 --- a/index.js +++ b/index.js @@ -1,28 +1,41 @@ var vm = require('vm'); -var requestModule = require('request'); - -var originalDefaults = requestModule.defaults; - -module.exports = defaults.call(requestModule, { - // Cookies should be enabled - jar: requestModule.jar(), - headers: { - 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', - 'Cache-Control': 'private', - 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' - }, - // Cloudflare requires a delay of 5 seconds, so wait for at least 6. - cloudflareTimeout: 6000, - // followAllRedirects - follow non-GET HTTP 3xx responses as redirects - followAllRedirects: true, - // Support only this max challenges in row. If CF returns more, throw an error - challengesToSolve: 3 -}); - -function defaults(options) { - var cloudscraper = originalDefaults.call(this, options, requester); - - if (requestModule === this) { +var requestModule = require('request-promise'); + +module.exports = defaults.call(requestModule); + +function defaults(params) { + // isCloudScraper === !isRequestModule + var isRequestModule = this === requestModule; + + var defaultParams = (!isRequestModule && this.defaultParams) || { + requester: requestModule, + // Cookies should be enabled + jar: requestModule.jar(), + headers: { + 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', + 'Cache-Control': 'private', + 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + }, + // Cloudflare requires a delay of 5 seconds, so wait for at least 6. + cloudflareTimeout: 6000, + // followAllRedirects - follow non-GET HTTP 3xx responses as redirects + followAllRedirects: true, + // Support only this max challenges in row. If CF returns more, throw an error + challengesToSolve: 3 + }; + + // Object.assign requires at least nodejs v4, request only test/supports v6+ + defaultParams = Object.assign({}, defaultParams, params); + + var cloudscraper = requestModule.defaults.call(this, defaultParams, requester); + + // There's no safety net here, any changes apply to all future requests + // that are made with this instance and derived instances + cloudscraper.defaultParams = defaultParams; + + // Ensure this instance gets a copy of our custom defaults function + // It's not necessary on subsequent calls + if (isRequestModule) { cloudscraper.defaults = defaults; } // Expose the debug option @@ -32,7 +45,7 @@ function defaults(options) { return cloudscraper; } -function requester(options, callback) { +function requester(options) { // Prevent overwriting realEncoding in subsequent calls if (!('realEncoding' in options)) { // Can't just do the normal options.encoding || 'utf8' @@ -44,21 +57,41 @@ function requester(options, callback) { } } + // Requester is wrapped by request to ensure that we get new options on first call options.encoding = null; - if (typeof callback !== 'function') { - throw new TypeError('Expected a callback function, got ' - + typeof(callback) + ' instead.'); - } - if (isNaN(options.challengesToSolve)) { throw new TypeError('Expected `challengesToSolve` option to be a number, ' + 'got ' + typeof(options.challengesToSolve) + ' instead.'); } - requestModule(options, function(error, response, body) { - processRequestResponse(options, {error: error, response: response, body: body}, callback); - }); + var createRequest = options.requester; + + if (typeof createRequest !== 'function') { + throw new TypeError('Expected `requester` option to be a function, got ' + + typeof(createRequest) + ' instead.'); + } + + var request = createRequest(options); + // This should be a user supplied callback or request-promise's default callback + var callback = request.callback; + + if (typeof callback !== 'function') { + throw new TypeError('Expected a callback function, got ' + + typeof(callback) + ' instead.'); + } + + var called = false; + request.callback = function(error, response, body) { + if (called) return; + + called = true; + var result = { error: error, response: response, body: body }; + + processRequestResponse(options, result, callback); + }; + + return request; } function processRequestResponse(options, requestResult, callback) { diff --git a/package.json b/package.json index f7ee871..730e427 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,8 @@ "license": "MIT", "homepage": "https://github.com/codemanki/cloudscraper", "dependencies": { - "request": "^2.88.0" + "request": "^2.88.0", + "request-promise": "^4.2.4" }, "devDependencies": { "chai": "^4.2.0", diff --git a/test/helper.js b/test/helper.js index 23461ce..13039d5 100644 --- a/test/helper.js +++ b/test/helper.js @@ -1,4 +1,4 @@ -var request = require('request'); +var request = require('request-promise'); var sinon = require('sinon'); var fs = require('fs'); var url = require('url'); @@ -7,6 +7,7 @@ var path = require('path'); var defaultParams = { // Since cloudscraper wraps the callback, just ensure callback is a function callback: sinon.match.func, + requester: request, jar: request.jar(), uri: 'http://example-site.dev/path/', headers: { @@ -73,7 +74,16 @@ module.exports = { } return function Request(params) { - params.callback(fake.error, fake.response, fake.body); + return Object.defineProperty({}, 'callback', { + get: function() { + // Return the callback function that is to be replaced. + return params.callback; + }, + set: function(callback) { + // Don't callback until after cloudscraper replaces the callback function. + callback(fake.error, fake.response, fake.body); + } + }); }; } }; diff --git a/test/test-errors.js b/test/test-errors.js index 75d3e25..842245c 100644 --- a/test/test-errors.js +++ b/test/test-errors.js @@ -1,7 +1,7 @@ 'use strict'; var cloudscraper = require('../index'); -var request = require('request'); +var request = require('request-promise'); var helper = require('./helper'); var sinon = require('sinon'); diff --git a/test/test-index.js b/test/test-index.js index e3a3368..9431dcd 100644 --- a/test/test-index.js +++ b/test/test-index.js @@ -1,7 +1,7 @@ 'use strict'; var cloudscraper = require('../index'); -var request = require('request'); +var request = require('request-promise'); var helper = require('./helper'); var sinon = require('sinon'); From 4233d08d0c401f0e86b6eb150b8e4594de19a559 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sat, 2 Mar 2019 01:25:33 -0600 Subject: [PATCH 13/33] Fix promise support --- index.js | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/index.js b/index.js index cd340db..a484e81 100644 --- a/index.js +++ b/index.js @@ -27,7 +27,10 @@ function defaults(params) { // Object.assign requires at least nodejs v4, request only test/supports v6+ defaultParams = Object.assign({}, defaultParams, params); - var cloudscraper = requestModule.defaults.call(this, defaultParams, requester); + var cloudscraper = requestModule.defaults + .call(this, defaultParams, function(options) { + return performRequest(options, true); + }); // There's no safety net here, any changes apply to all future requests // that are made with this instance and derived instances @@ -45,7 +48,7 @@ function defaults(params) { return cloudscraper; } -function requester(options) { +function performRequest(options, isFirstRequest) { // Prevent overwriting realEncoding in subsequent calls if (!('realEncoding' in options)) { // Can't just do the normal options.encoding || 'utf8' @@ -65,17 +68,19 @@ function requester(options) { + 'got ' + typeof(options.challengesToSolve) + ' instead.'); } - var createRequest = options.requester; + // This should be the default export of either request or request-promise. + var requester = options.requester; - if (typeof createRequest !== 'function') { + if (typeof requester !== 'function') { throw new TypeError('Expected `requester` option to be a function, got ' - + typeof(createRequest) + ' instead.'); + + typeof(requester) + ' instead.'); } - var request = createRequest(options); - // This should be a user supplied callback or request-promise's default callback + var request = requester(options); + // This should be a user supplied callback or request-promise's callback. var callback = request.callback; + // If the requester is not request-promise, ensure we get a callback. if (typeof callback !== 'function') { throw new TypeError('Expected a callback function, got ' + typeof(callback) + ' instead.'); @@ -88,7 +93,13 @@ function requester(options) { called = true; var result = { error: error, response: response, body: body }; - processRequestResponse(options, result, callback); + if (isFirstRequest) { + // We only need the callback from the first request. + // The other callbacks can be safely ignored. + options.callback = callback; + } + + processRequestResponse(options, result, options.callback); }; return request; @@ -202,7 +213,7 @@ function solveChallenge(response, body, options, callback) { options.challengesToSolve = options.challengesToSolve - 1; // Make request with answer - requester(options, callback); + performRequest(options, false); } function setCookieAndReload(response, body, options, callback) { @@ -232,12 +243,16 @@ function setCookieAndReload(response, body, options, callback) { options.challengesToSolve = options.challengesToSolve - 1; - requester(options, callback); + performRequest(options, false); } function processResponseBody(options, error, response, body, callback) { if(typeof options.realEncoding === 'string') { body = body.toString(options.realEncoding); + // The resolveWithFullResponse option will resolve with the response object. + // This changes the response.body so it is as expected. + response.body = body; + // In case of real encoding, try to validate the response // and find potential errors there. // If encoding is not provided, return response as it is @@ -246,6 +261,5 @@ function processResponseBody(options, error, response, body, callback) { } } - callback(error, response, body); } From 76bb41473ef5029de69d112ff039ac251d176a38 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sat, 2 Mar 2019 01:31:03 -0600 Subject: [PATCH 14/33] Add tests for promise-support 100% code coverage --- package.json | 1 + test/common.js | 2 + test/helper.js | 38 ++++++--- test/rp.js | 35 +++++++++ test/test-errors.js | 188 ++++++++++++++++++++++++-------------------- test/test-index.js | 159 +++++++++++++++++++------------------ test/test-rp.js | 105 +++++++++++++++++++++++++ 7 files changed, 356 insertions(+), 172 deletions(-) create mode 100644 test/rp.js create mode 100644 test/test-rp.js diff --git a/package.json b/package.json index 730e427..aa249e0 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ }, "devDependencies": { "chai": "^4.2.0", + "chai-as-promised": "^7.1.1", "coveralls": "^3.0.3", "eslint": "^5.14.1", "eslint-config-standard": "^12.0.0", diff --git a/test/common.js b/test/common.js index 4094503..1c96a18 100644 --- a/test/common.js +++ b/test/common.js @@ -3,4 +3,6 @@ var chai = require('chai'); chai.use(require('sinon-chai')); +chai.use(require('chai-as-promised')); + chai.config.includeStack = true; \ No newline at end of file diff --git a/test/helper.js b/test/helper.js index 13039d5..a1d5571 100644 --- a/test/helper.js +++ b/test/helper.js @@ -1,4 +1,4 @@ -var request = require('request-promise'); +var request = require('./rp'); var sinon = require('sinon'); var fs = require('fs'); var url = require('url'); @@ -7,7 +7,7 @@ var path = require('path'); var defaultParams = { // Since cloudscraper wraps the callback, just ensure callback is a function callback: sinon.match.func, - requester: request, + requester: sinon.match.func, jar: request.jar(), uri: 'http://example-site.dev/path/', headers: { @@ -29,7 +29,6 @@ var cache = {}; module.exports = { getFixture: function(fileName) { if (cache[fileName] === undefined) { - // noinspection JSUnresolvedVariable cache[fileName] = fs.readFileSync(path.join(__dirname, 'fixtures', fileName), 'utf8'); } return cache[fileName]; @@ -40,7 +39,7 @@ module.exports = { var fake = Object.assign({ statusCode: 200, headers: defaultParams.headers, - body: '', + body: '' }, template); // The uri property of the fake response is only for tests to simplify fake request creation. @@ -65,7 +64,7 @@ module.exports = { var fake = Object.assign({ error: null, // Set the default fake statusCode to 500 if an error is provided - response: { statusCode: template.error ? 500 : 200 }, + response: { statusCode: template.error ? 500 : 200 } }, template); // Use the body from fake response if the template doesn't provide it @@ -73,17 +72,38 @@ module.exports = { fake.body = fake.response.body; } + // Freeze the fake result and it's properties for more reliable tests. + Object.freeze(fake); + Object.keys(fake).forEach(function (key) { + if (!Object.isFrozen(fake[key]) && !Buffer.isBuffer(fake[key])) { + Object.freeze(fake[key]); + } + }); + return function Request(params) { - return Object.defineProperty({}, 'callback', { + // The promise returned by request-promise won't resolve until + // it's callback is called. The problem is that we need to callback + // after the constructor returns to simulate a real request/response. + var instance = request(params); + + // This is the callback that cloudscraper should replace. + var callback = instance.callback; + + // We don't want to callback with the fake result until + // after the constructor returns thus define a property getter/setter + // and wait for cloudscraper to set it's own callback. + Object.defineProperty(instance, 'callback', { get: function() { - // Return the callback function that is to be replaced. - return params.callback; + // Returns request-promise's callback. + return callback; }, set: function(callback) { - // Don't callback until after cloudscraper replaces the callback function. + // This won't callback unless cloudscraper replaces the callback. callback(fake.error, fake.response, fake.body); } }); + + return instance; }; } }; diff --git a/test/rp.js b/test/rp.js new file mode 100644 index 0000000..096189a --- /dev/null +++ b/test/rp.js @@ -0,0 +1,35 @@ +'use strict'; + +// Reproduces: https://github.com/request/request-promise/blob/6d11ddc63dde2462a8e39cd8d0b6956556b977f1/lib/rp.js + +// This library almost exactly reproduces the real rp library. +// The primary difference being that we're pre-patching the init method. +// It must be done this way because request-promise bypasses require.cache. + +var Bluebird = require('bluebird').getNewLibraryCopy(); +var configure = require('request-promise-core/configure/request2'); +var request = require('request'); + +// The real rp library works by replacing this init function. +// It will store the original callback from options, +// apply our init function, and wrap the request instance's callback. +function init(options) { + // Request -> Request.prototype.init -> Request.prototype.start + // The test/helper is responsible for calling back with a fake response. +} + +// Replacing init with a noop prevents real requests from being made. +request.Request.prototype.init = init; + +configure({ + request: request, + PromiseImpl: Bluebird, + expose: [ + 'then', + 'catch', + 'finally', + 'promise' + ] +}); + +module.exports = request; diff --git a/test/test-errors.js b/test/test-errors.js index 842245c..1d02148 100644 --- a/test/test-errors.js +++ b/test/test-errors.js @@ -2,10 +2,12 @@ var cloudscraper = require('../index'); var request = require('request-promise'); +var RequestError = require('request-promise/errors').RequestError; var helper = require('./helper'); var sinon = require('sinon'); var expect = require('chai').expect; +var assert = require('chai').assert; describe('Cloudscraper', function() { var uri = helper.defaultParams.uri; @@ -30,70 +32,69 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ error: fakeError })); - cloudscraper.get(uri, function(error) { + var promise = cloudscraper.get(uri, function (error) { // errorType 0, means it is some kind of system error expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 0, error: fakeError }); expect(error.error).to.be.an('error'); - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); - done(); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); }); it('should return error if captcha is served by cloudflare', function(done) { - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, body: helper.getFixture('captcha.html') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 1, means captcha is served expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 1 }); - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); }); it('should return error if cloudflare returned some inner error', function(done) { // https://support.cloudflare.com/hc/en-us/sections/200038216-CloudFlare-Error-Messages // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 500, body: helper.getFixture('access_denied.html') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 2, means inner cloudflare error expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 2, error: 1006 }); - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); }); - + it('should return error if cf presented more than 3 challenges in a row', function(done) { // The expected params for all subsequent calls to Request var expectedParams = helper.extendParams({ - uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl' }); // Perform less strict matching on headers and qs to simplify this test @@ -110,11 +111,11 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 4 }); - expect(Request.callCount).to.be.equal(4); + assert.equal(Request.callCount, 4, 'Request call count'); expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); var total = helper.defaultParams.challengesToSolve + 1; @@ -126,10 +127,12 @@ describe('Cloudscraper', function() { expect(response).to.be.equal(expectedResponse); expect(body).to.be.equal(expectedResponse.body); - done(); }); - this.clock.tick(200000); // tick the timeout + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + + // Tick the timeout + this.clock.tick(200000); }); it('should return error if body is undefined', function(done) { @@ -140,102 +143,102 @@ describe('Cloudscraper', function() { response: { statusCode: 500} })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 2, means inner cloudflare error expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 0, error: null }); - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); expect(body).to.be.equal(undefined); - done(); }); + + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); }); it('should return error if challenge page failed to be parsed', function(done) { - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ body: helper.getFixture('invalid_js_challenge.html') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 3, means parsing failed expect(error).to.be.an('object'); expect(error).to.own.include({ errorType: 3 }); - expect(Request).to.be.calledOnce; - expect(Request).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + this.clock.tick(7000); // tick the timeout }); it('should return error if js challenge has error during evaluation', function(done) { - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, body: helper.getFixture('js_challenge_03_12_2018_1.html') }); // Adds a syntax error near the end of line 37 - onlyResponse.body = onlyResponse.body.replace(/\.toFixed/gm, '..toFixed'); + expectedResponse.body = expectedResponse.body.replace(/\.toFixed/gm, '..toFixed'); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 3, means parsing failed expect(error).to.be.an('object'); expect(error).to.own.include({ errorType: 3 }); expect(error.error).to.be.a('string'); expect(error.error).to.include('Error occurred during evaluation: '); - expect(Request).to.be.calledOnce; - expect(Request).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + this.clock.tick(7000); // tick the timeout }); it('should return error if challengeId extraction fails', function(done) { - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, body: helper.getFixture('js_challenge_03_12_2018_1.html') }); - onlyResponse.body = onlyResponse.body.replace(/name="jschl_vc"/gm, ''); + expectedResponse.body = expectedResponse.body.replace(/name="jschl_vc"/gm, ''); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 3, means parsing failed expect(error).to.be.an('object'); expect(error).to.own.include({ errorType: 3 }); expect(error.error).to.be.a('string'); expect(error.error).to.include('I cant extract challengeId'); - expect(Request).to.be.calledOnce; - expect(Request).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + this.clock.tick(7000); // tick the timeout }); it('should return error if it was thrown by request when solving challenge', function(done) { - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, body: helper.getFixture('js_challenge_21_05_2015.html') }); @@ -246,12 +249,12 @@ describe('Cloudscraper', function() { // Cloudflare is enabled for site. It returns a page with js challenge Request.onFirstCall() - .callsFake(helper.fakeRequest({ response: onlyResponse })); + .callsFake(helper.fakeRequest({ response: expectedResponse })); Request.onSecondCall() .callsFake(helper.fakeRequest({ error: fakeError })); - cloudscraper.get(uri, function(error) { + var promise = cloudscraper.get(uri, function (error) { // errorType 0, a connection error for example expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 0, error: fakeError }); @@ -259,9 +262,10 @@ describe('Cloudscraper', function() { expect(Request).to.be.calledTwice; expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); - done(); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + // tick the timeout this.clock.tick(7000); }); @@ -295,7 +299,7 @@ describe('Cloudscraper', function() { Request.onSecondCall() .callsFake(helper.fakeRequest({ response: secondResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 1, means captcha is served expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 1 }); @@ -306,37 +310,38 @@ describe('Cloudscraper', function() { expect(response).to.be.equal(secondResponse); expect(body).to.be.equal(secondResponse.body); - done(); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + this.clock.tick(7000); // tick the timeout }); it('should return error if challenge page cookie extraction fails', function(done) { // Cloudflare is enabled for site. // It returns a redirecting page if a (session) cookie is unset. - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, // The cookie extraction codes looks for the `S` variable assignment body: helper.getFixture('js_challenge_cookie.html').replace(/S=/gm, 'Z=') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 3, error: 'I cant extract cookie generation code from page' }); - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); }); it('should throw a TypeError if callback is not a function', function(done) { @@ -348,6 +353,15 @@ describe('Cloudscraper', function() { done(); }); + it('should throw a TypeError if requester is not a function', function (done) { + var spy = sinon.spy(function () { + cloudscraper.get({ requester: null }); + }); + + expect(spy).to.throw(TypeError, /`requester` option .*function/); + done(); + }); + it('should throw a TypeError if challengesToSolve is not a number', function(done) { var spy = sinon.spy(function() { var options = { uri: uri, challengesToSolve: 'abc' }; @@ -359,12 +373,12 @@ describe('Cloudscraper', function() { done(); }); - it('should detect captcha in the response body\'s real encoding', function(done) { + it('should detect captcha in response body\'s real encoding', function (done) { var firstParams = helper.extendParams({ realEncoding: 'fake-encoding' }); - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, body: { toString: function(encoding) { @@ -377,23 +391,23 @@ describe('Cloudscraper', function() { } }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var options = { uri: uri, encoding: 'fake-encoding' }; - cloudscraper.get(options, function(error, response, body) { + var promise = cloudscraper.get(options, function (error, response, body) { // errorType 1, means captcha is served expect(error).to.be.an('object'); expect(error).to.be.eql({ errorType: 1 }); - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request).to.be.calledOnceWithExactly(firstParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body.toString('fake-encoding')); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body.toString('fake-encoding')); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + this.clock.tick(7000); // tick the timeout }); @@ -402,28 +416,28 @@ describe('Cloudscraper', function() { var html = helper.getFixture('js_challenge_cookie.html'); var b64 = (new Buffer('throw new Error(\'vm eval failed\');')).toString('base64'); - var onlyResponse = helper.fakeResponse({ + var expectedResponse = helper.fakeResponse({ statusCode: 503, body: html.replace(/S='([^']+)'/, 'S=\'' + b64 + '\'') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 3, means parsing failed expect(error).to.be.an('object'); expect(error).to.own.include({ errorType: 3 }); expect(error.error).to.be.a('string'); expect(error.error).to.include('Error occurred during evaluation: vm eval failed'); - expect(Request).to.be.calledOnce; - expect(Request).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + this.clock.tick(7000); // tick the timeout }); }); diff --git a/test/test-index.js b/test/test-index.js index 9431dcd..bfee2b2 100644 --- a/test/test-index.js +++ b/test/test-index.js @@ -7,9 +7,9 @@ var helper = require('./helper'); var sinon = require('sinon'); var expect = require('chai').expect; -describe('Cloudscraper', function() { - var requestedPage = helper.getFixture('requested_page.html'); - var uri = helper.defaultParams.uri; +describe('Cloudscraper', function () { + var requestedPage = helper.getFixture('requested_page.html'); + var uri = helper.defaultParams.uri; var sandbox; var Request; @@ -26,49 +26,47 @@ describe('Cloudscraper', function() { this.clock.restore(); }); - it('should return requested page, if cloudflare is disabled for page', function(done) { - var onlyResponse = helper.fakeResponse({ + it('should return requested page, if cloudflare is disabled for page', function (done) { + var expectedResponse = helper.fakeResponse({ statusCode: 200, body: requestedPage }); - - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); - cloudscraper.get(uri, function(error, response, body) { + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri, function (error, response, body) { expect(error).to.be.null; - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); }); - it('should not trigger any error if recaptcha is present in page not protected by CF', function(done) { - var onlyResponse = helper.fakeResponse({ + it('should not trigger any error if recaptcha is present in page not protected by CF', function (done) { + var expectedResponse = helper.fakeResponse({ statusCode: 200, body: helper.getFixture('page_with_recaptcha.html') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { expect(error).to.be.null; - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); }); - it('should resolve challenge (version as on 21.05.2015) and then return page', function(done) { + it('should resolve challenge (version as on 21.05.2015) and then return page', function (done) { // Cloudflare is enabled for site. It returns a page with js challenge var firstResponse = helper.fakeResponse({ statusCode: 503, @@ -97,9 +95,9 @@ describe('Cloudscraper', function() { var secondResponse = helper.fakeResponse({ body: requestedPage }); Request.onSecondCall()// Cloudflare is enabled for site. It returns a page with js challenge - .callsFake(helper.fakeRequest({ response: secondResponse})); + .callsFake(helper.fakeRequest({ response: secondResponse })); - cloudscraper.get(uri, function(error, response, body) { + var promise = cloudscraper.get(uri, function (error, response, body) { expect(error).to.be.null; expect(Request).to.be.calledTwice; @@ -108,14 +106,15 @@ describe('Cloudscraper', function() { expect(response).to.be.equal(secondResponse); expect(body).to.be.equal(secondResponse.body); - done(); }); + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + // tick the timeout this.clock.tick(7000); }); - it('should resolve challenge (version as on 09.06.2016) and then return page', function(done) { + it('should resolve challenge (version as on 09.06.2016) and then return page', function (done) { // Cloudflare is enabled for site. It returns a page with js challenge var firstResponse = helper.fakeResponse({ statusCode: 503, @@ -146,23 +145,24 @@ describe('Cloudscraper', function() { Request.onSecondCall() .callsFake(helper.fakeRequest({ response: secondResponse })); - cloudscraper.get(uri, function(error, response, body) { - expect(error).to.be.null; + var promise = cloudscraper.get(uri, function (error, response, body) { + expect(error).to.be.null; - expect(Request).to.be.called; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); - expect(Request).to.be.calledTwice; - expect(Request.secondCall).to.be.calledWithExactly(secondParams); + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); - expect(response).to.be.equal(secondResponse); - expect(body).to.be.equal(secondResponse.body); - done(); + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); }); + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + this.clock.tick(7000); // tick the timeout }); - it('should resolve 2 consequent challenges', function(done) { + it('should resolve 2 consequent challenges', function (done) { + var firstParams = helper.extendParams({ resolveWithFullResponse: true }); // First call and CF returns a challenge var firstResponse = helper.fakeResponse({ statusCode: 503, @@ -173,10 +173,12 @@ describe('Cloudscraper', function() { .callsFake(helper.fakeRequest({ response: firstResponse })); var secondParams = helper.extendParams({ + resolveWithFullResponse: true, uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', qs: { 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa', - 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge in this particular case + 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge + // in this particular case 'pass': '1543827239.915-44n9IE20mS' }, headers: { @@ -195,6 +197,7 @@ describe('Cloudscraper', function() { .callsFake(helper.fakeRequest({ response: secondResponse })); var thirdParams = helper.extendParams({ + resolveWithFullResponse: true, uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', qs: { 'jschl_vc': 'a41fee3a9f041fea01f0cbf3e8e4d29b', @@ -214,91 +217,94 @@ describe('Cloudscraper', function() { Request.onThirdCall() .callsFake(helper.fakeRequest({ response: thirdResponse })); - cloudscraper.get(uri, function(error, response, body) { + var options = { uri: uri, resolveWithFullResponse: true }; + + var promise = cloudscraper.get(options, function (error, response, body) { expect(error).to.be.null; expect(Request).to.be.calledThrice; - expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); + expect(Request.firstCall).to.be.calledWithExactly(firstParams); expect(Request.secondCall).to.be.calledWithExactly(secondParams); expect(Request.thirdCall).to.be.calledWithExactly(thirdParams); expect(response).to.be.equal(thirdResponse); expect(body).to.be.equal(thirdResponse.body); - done(); }); + expect(promise).to.eventually.equal(thirdResponse).and.notify(done); + this.clock.tick(14000); // tick the timeout }); - it('should make post request with formData', function(done) { + it('should make post request with formData', function (done) { var formData = { some: 'data' }; - var firstParams = helper.extendParams({ + var expectedParams = helper.extendParams({ method: 'POST', formData: formData }); // Stub first call, which request makes to page. It should return requested page - var onlyResponse = helper.fakeResponse({ body: requestedPage }); + var expectedResponse = helper.fakeResponse({ body: requestedPage }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var options = { uri: uri, formData: formData }; - cloudscraper.post(options, function(error, response, body) { + var promise = cloudscraper.post(options, function (error, response, body) { expect(error).to.be.null; - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request).to.be.calledOnceWithExactly(expectedParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); }); - it('should make delete request', function(done) { - var firstParams = helper.extendParams({ method: 'DELETE' }); + it('should make delete request', function (done) { + var expectedParams = helper.extendParams({ method: 'DELETE' }); // Stub first call, which request makes to page. It should return requested page - var onlyResponse = helper.fakeResponse({ body: requestedPage }); + var expectedResponse = helper.fakeResponse({ body: requestedPage }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); - cloudscraper.delete(uri, function(error, response, body) { + var promise = cloudscraper.delete(uri, function (error, response, body) { expect(error).to.be.null; - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request).to.be.calledOnceWithExactly(expectedParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); }); - it('should return raw data when encoding is null', function(done) { - var firstParams = helper.extendParams({ realEncoding: null }); - // Stub first call, which request makes to page. It should return requested page - var onlyResponse = helper.fakeResponse({ + it('should return raw data when encoding is null', function (done) { + var expectedParams = helper.extendParams({ realEncoding: null }); + + var expectedResponse = helper.fakeResponse({ body: new Buffer('R0lGODlhDwAPAKECAAAAzMzM/////wAAACwAAAAADwAPAAACIISPeQHsrZ5ModrLlN48CXF8m2iQ3YmmKqVlRtW4MLwWACH+H09wdGltaXplZCBieSBVbGVhZCBTbWFydFNhdmVyIQAAOw==', 'base64') }); - Request.callsFake(helper.fakeRequest({ response: onlyResponse })); + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var options = { uri: uri, encoding: null }; - cloudscraper.get(options, function(error, response, body) { + var promise = cloudscraper.get(options, function (error, response, body) { expect(error).to.be.null; - expect(Request).to.be.calledOnce; - expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request).to.be.calledOnceWithExactly(expectedParams); - expect(response).to.be.equal(onlyResponse); - expect(body).to.be.equal(onlyResponse.body); - done(); + expect(response).to.be.equal(expectedResponse); + expect(body).to.be.equal(expectedResponse.body); }); + + expect(promise).to.eventually.equal(expectedResponse.body).and.notify(done); }); - it('should set the given cookie and then return page', function(done) { + it('should set the given cookie and then return page', function (done) { var firstResponse = helper.fakeResponse({ body: helper.getFixture('js_challenge_cookie.html') }); @@ -312,7 +318,7 @@ describe('Cloudscraper', function() { var secondResponse = helper.fakeResponse({ body: requestedPage }); // Only callback with the second response if the cookie string matches - var matchCookie = sinon.match(function(params) { + var matchCookie = sinon.match(function (params) { return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a'; }); @@ -325,7 +331,7 @@ describe('Cloudscraper', function() { // We need to override cloudscraper's default jar for this test var options = { uri: uri, jar: helper.defaultParams.jar }; - cloudscraper.get(options, function(error, response, body) { + var promise = cloudscraper.get(options, function (error, response, body) { expect(error).to.be.null; expect(Request).to.be.calledTwice; @@ -334,8 +340,9 @@ describe('Cloudscraper', function() { expect(response).to.be.equal(secondResponse); expect(body).to.be.equal(secondResponse.body); - done(); }); + + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); }); it('should define custom defaults function', function (done) { diff --git a/test/test-rp.js b/test/test-rp.js new file mode 100644 index 0000000..e43e40b --- /dev/null +++ b/test/test-rp.js @@ -0,0 +1,105 @@ +'use strict'; + +var cloudscraper = require('../index'); +var request = require('request-promise'); +var helper = require('./helper'); + +var sinon = require('sinon'); +var expect = require('chai').expect; + +describe('Cloudscraper promise', function () { + var requestedPage = helper.getFixture('requested_page.html'); + var uri = helper.defaultParams.uri; + var sandbox; + var Request; + + beforeEach(function () { + helper.defaultParams.jar = request.jar(); + sandbox = sinon.createSandbox(); + // Prepare stubbed Request for each test + Request = sandbox.stub(request, 'Request'); + // setTimeout should be properly stubbed to prevent the unit test from running too long. + this.clock = sinon.useFakeTimers(); + }); + + afterEach(function () { + sandbox.restore(); + this.clock.restore(); + }); + + it('should resolve with response body', function () { + var expectedResponse = helper.fakeResponse({ body: requestedPage }); + var expectedParams = helper.extendParams({ callback: undefined }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper.get(uri); + + return promise.then(function (body) { + expect(Request).to.be.calledOnceWithExactly(expectedParams); + expect(body).to.be.equal(requestedPage); + }); + }); + + it('should resolve with full response', function () { + var expectedResponse = helper.fakeResponse({ + statusCode: 200, + body: requestedPage + }); + + var expectedParams = helper.extendParams({ + callback: undefined, + resolveWithFullResponse: true + }); + + // The method is implicitly GET + delete expectedParams.method; + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var promise = cloudscraper({ + uri: uri, + resolveWithFullResponse: true + }); + + return promise.then(function (response) { + expect(Request).to.be.calledOnceWithExactly(expectedParams); + + expect(response).to.be.equal(expectedResponse); + expect(response.body).to.be.equal(requestedPage); + }); + }); + + // The helper calls the fake request callback synchronously. This results + // in the promise being rejected before we catch it in the test. + // This can be noticeable if we return the promise instead of calling done. + it('should define catch', function (done) { + var expectedResponse = helper.fakeResponse({ error: new Error('fake') }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var caught = false; + var promise = cloudscraper(uri); + + promise.catch(function () { + caught = true; + }).then(function () { + if (caught) done(); + }); + }); + + it('should define finally', function (done) { + var expectedResponse = helper.fakeResponse({ error: new Error('fake') }); + + Request.callsFake(helper.fakeRequest({ response: expectedResponse })); + + var caught = false; + var promise = cloudscraper(uri); + + promise.then(function () { + caught = true; + }).finally(function () { + if (!caught) done(); + }); + }); +}); From d1c0b7813a066ebb065af8fe6f4d0435b1dcf6d3 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sat, 2 Mar 2019 09:44:57 -0600 Subject: [PATCH 15/33] Make errors consistent with request-promise/errors --- errors.js | 75 ++++++++++++++++++++++++++++ index.js | 76 ++++++++++++++++++++--------- test/test-errors.js | 116 ++++++++++++++++++++++---------------------- 3 files changed, 185 insertions(+), 82 deletions(-) create mode 100644 errors.js diff --git a/errors.js b/errors.js new file mode 100644 index 0000000..1edf3e8 --- /dev/null +++ b/errors.js @@ -0,0 +1,75 @@ +'use strict'; + +var original = require('request-promise-core/errors'); +var OriginalError = original.RequestError; + +// The purpose of this library is two-fold. +// 1. Have errors consistent with request/promise-core +// 2. Prevent request/promise core from wrapping our errors + +// There are two differences between these errors and the originals. +// 1. There is a non-enumerable errorType attribute. +// 2. The error constructor is hidden from the stacktrace. + +function create(name, errorType) { + function CustomError(cause, options, response) { + + // This prevents nasty things e.g. `error.cause.error` and + // is why replacing the original RequestError is necessary. + if (cause instanceof OriginalError) { + return cause; + } + + OriginalError.apply(this, arguments); + + // Change the name to match this constructor + this.name = name; + + if (Error.captureStackTrace) { // required for non-V8 environments + // Provide a proper stack trace that hides this constructor + Error.captureStackTrace(this, CustomError); + } + } + + CustomError.prototype = Object.create(OriginalError.prototype); + CustomError.prototype.constructor = CustomError; + // Keeps things stealthy by defining errorType on the prototype. + // This makes it non-enumerable and safer to add. + CustomError.prototype.errorType = errorType; + + Object.setPrototypeOf(CustomError, Object.getPrototypeOf(OriginalError)); + Object.defineProperty(CustomError, 'name', { + configurable: true, + value: name + }); + + return CustomError; +} + +var RequestError = create('RequestError', 0); +var CaptchaError = create('CaptchaError', 1); +var CloudflareError = create('CloudflareError', 2); +var ParserError = create('ParserError', 3); +// errorType 4 is a CloudflareError so that constructor is reused. + +// The following errors originate from promise-core and it's dependents. +// Give them an errorType for consistency. +original.StatusCodeError.prototype.errorType = 5; +original.TransformError.prototype.errorType = 6; + +// This replaces the RequestError for all libraries using request/promise-core +// and prevents silent failure. +Object.defineProperty(original, 'RequestError', { + configurable: true, + enumerable: true, + writable: true, + value: RequestError +}); + +// Export our custom errors along with StatusCodeError, etc. +Object.assign(module.exports, original, { + RequestError: RequestError, + CaptchaError: CaptchaError, + ParserError: ParserError, + CloudflareError: CloudflareError +}); diff --git a/index.js b/index.js index a484e81..ad3fd36 100644 --- a/index.js +++ b/index.js @@ -1,5 +1,6 @@ var vm = require('vm'); var requestModule = require('request-promise'); +var errors = require('./errors'); module.exports = defaults.call(requestModule); @@ -105,24 +106,27 @@ function performRequest(options, isFirstRequest) { return request; } -function processRequestResponse(options, requestResult, callback) { - var error = requestResult.error; - var response = requestResult.response; - var body = requestResult.body; - var validationError; +function processRequestResponse(options, result, callback) { + var error = result.error; + var response = result.response; + var body = result.body; var stringBody; var isChallengePresent; var isRedirectChallengePresent; if (error || !body || !body.toString) { // Pure request error (bad connection, wrong url, etc) - return callback({ errorType: 0, error: error }, response, body); + error = new errors.RequestError(error, options, response); + + return callback(error, response, body); } stringBody = body.toString('utf8'); - if (validationError = checkForErrors(error, stringBody)) { - return callback(validationError, response, body); + try { + validate(response, stringBody, options); + } catch (error) { + return callback(error, response, body); } isChallengePresent = stringBody.indexOf('a = document.getElementById(\'jschl-answer\');') !== -1; @@ -130,7 +134,11 @@ function processRequestResponse(options, requestResult, callback) { // isTargetPage = !isChallengePresent && !isRedirectChallengePresent; if (isChallengePresent && options.challengesToSolve === 0) { - return callback({ errorType: 4 }, response, body); + var cause = 'Cloudflare challenge loop'; + error = new errors.CloudflareError(cause, options, response); + error.errorType = 4; + + return callback(error, response, body); } // If body contains specified string, solve challenge @@ -142,23 +150,24 @@ function processRequestResponse(options, requestResult, callback) { setCookieAndReload(response, stringBody, options, callback); } else { // All is good - processResponseBody(options, error, response, body, callback); + processResponseBody(response, body, options, callback); } } -function checkForErrors(error, body) { +function validate(response, body, options) { var match; // Finding captcha if (body.indexOf('why_captcha') !== -1 || /cdn-cgi\/l\/chk_captcha/i.test(body)) { - return { errorType: 1 }; + throw new errors.CaptchaError('captcha', options, response); } // trying to find '1006' match = body.match(/<\w+\s+class="cf-error-code">(.*)<\/\w+>/i); if (match) { - return { errorType: 2, error: parseInt(match[1]) }; + var code = parseInt(match[1]); + throw new errors.CloudflareError(code, options, response); } return false; @@ -170,9 +179,14 @@ function solveChallenge(response, body, options, callback) { var jsChlVc; var answerResponse; var answerUrl; + var error; + var cause; if (!challenge) { - return callback({errorType: 3, error: 'I cant extract challengeId (jschl_vc) from page'}, response, body); + cause = 'challengeId (jschl_vc) extraction failed'; + error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); } jsChlVc = challenge[1]; @@ -180,7 +194,10 @@ function solveChallenge(response, body, options, callback) { challenge = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i); if (!challenge) { - return callback({errorType: 3, error: 'I cant extract method from setTimeOut wrapper'}, response, body); + cause = 'setTimeout callback extraction failed'; + error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); } var challenge_pass = body.match(/name="pass" value="(.+?)"/)[1]; @@ -198,8 +215,11 @@ function solveChallenge(response, body, options, callback) { 'jschl_answer': (eval(challenge) + response.request.host.length), 'pass': challenge_pass }; - } catch (err) { - return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body); + } catch (error) { + error.message = 'Challenge evaluation failed: ' + error.message; + error = new errors.ParserError(error, options, response); + + return callback(error, response, body); } answerUrl = response.request.uri.protocol + '//' + host + '/cdn-cgi/l/chk_jschl'; @@ -220,7 +240,10 @@ function setCookieAndReload(response, body, options, callback) { var challenge = body.match(/S='([^']+)'/); if (!challenge) { - return callback({errorType: 3, error: 'I cant extract cookie generation code from page'}, response, body); + var cause = 'Cookie code extraction failed'; + var error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); } var base64EncodedCode = challenge[1]; @@ -237,8 +260,11 @@ function setCookieAndReload(response, body, options, callback) { vm.runInNewContext(cookieSettingCode, sandbox); options.jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); - } catch (err) { - return callback({errorType: 3, error: 'Error occurred during evaluation: ' + err.message}, response, body); + } catch (error) { + error.message = 'Cookie code evaluation failed: ' + error.message; + error = new errors.ParserError(error, options, response); + + return callback(error, response, body); } options.challengesToSolve = options.challengesToSolve - 1; @@ -246,7 +272,9 @@ function setCookieAndReload(response, body, options, callback) { performRequest(options, false); } -function processResponseBody(options, error, response, body, callback) { +function processResponseBody(response, body, options, callback) { + var error = null; + if(typeof options.realEncoding === 'string') { body = body.toString(options.realEncoding); // The resolveWithFullResponse option will resolve with the response object. @@ -256,8 +284,10 @@ function processResponseBody(options, error, response, body, callback) { // In case of real encoding, try to validate the response // and find potential errors there. // If encoding is not provided, return response as it is - if (validationError = checkForErrors(error, body)) { - return callback(validationError, response, body); + try { + validate(response, body, options); + } catch (e) { + error = e; } } diff --git a/test/test-errors.js b/test/test-errors.js index 1d02148..75e3066 100644 --- a/test/test-errors.js +++ b/test/test-errors.js @@ -2,7 +2,7 @@ var cloudscraper = require('../index'); var request = require('request-promise'); -var RequestError = require('request-promise/errors').RequestError; +var errors = require('../errors'); var helper = require('./helper'); var sinon = require('sinon'); @@ -28,20 +28,19 @@ describe('Cloudscraper', function() { }); it('should return error if it was thrown by request', function(done) { - var fakeError = new Error('fake error'); + var fakeError = new Error('fake'); Request.callsFake(helper.fakeRequest({ error: fakeError })); var promise = cloudscraper.get(uri, function (error) { - // errorType 0, means it is some kind of system error - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 0, error: fakeError }); - expect(error.error).to.be.an('error'); + expect(error).to.be.instanceOf(errors.RequestError); + expect(error).to.have.property('error', fakeError); + expect(error).to.have.property('errorType', 0); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); }); it('should return error if captcha is served by cloudflare', function(done) { @@ -54,8 +53,9 @@ describe('Cloudscraper', function() { var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 1, means captcha is served - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 1 }); + expect(error).to.be.instanceOf(errors.CaptchaError); + expect(error).to.have.property('error', 'captcha'); + expect(error).to.have.property('errorType', 1); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -63,7 +63,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); }); it('should return error if cloudflare returned some inner error', function(done) { @@ -79,8 +79,9 @@ describe('Cloudscraper', function() { var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 2, means inner cloudflare error - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 2, error: 1006 }); + expect(error).to.be.instanceOf(errors.CloudflareError); + expect(error).to.have.property('error', 1006); + expect(error).to.have.property('errorType', 2); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -88,7 +89,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); }); it('should return error if cf presented more than 3 challenges in a row', function(done) { @@ -112,8 +113,9 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var promise = cloudscraper.get(uri, function (error, response, body) { - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 4 }); + expect(error).to.be.instanceOf(errors.CloudflareError); + expect(error).to.have.property('error', 'Cloudflare challenge loop'); + expect(error).to.have.property('errorType', 4); assert.equal(Request.callCount, 4, 'Request call count'); expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); @@ -129,7 +131,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.CloudflareError).and.notify(done); // Tick the timeout this.clock.tick(200000); @@ -140,20 +142,20 @@ describe('Cloudscraper', function() { // Error codes: 1012, 1011, 1002, 1000, 1004, 1010, 1006, 1007, 1008 Request.callsFake(helper.fakeRequest({ - response: { statusCode: 500} + response: {statusCode: 500} })); var promise = cloudscraper.get(uri, function (error, response, body) { - // errorType 2, means inner cloudflare error - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 0, error: null }); + expect(error).to.be.instanceOf(errors.RequestError); + expect(error).to.have.property('error', null); + expect(error).to.have.property('errorType', 0); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); expect(body).to.be.equal(undefined); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); }); it('should return error if challenge page failed to be parsed', function(done) { @@ -164,9 +166,9 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var promise = cloudscraper.get(uri, function (error, response, body) { - // errorType 3, means parsing failed - expect(error).to.be.an('object'); - expect(error).to.own.include({ errorType: 3 }); + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error').that.is.ok; + expect(error).to.have.property('errorType', 3); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -174,7 +176,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); this.clock.tick(7000); // tick the timeout }); @@ -191,11 +193,10 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var promise = cloudscraper.get(uri, function (error, response, body) { - // errorType 3, means parsing failed - expect(error).to.be.an('object'); - expect(error).to.own.include({ errorType: 3 }); - expect(error.error).to.be.a('string'); - expect(error.error).to.include('Error occurred during evaluation: '); + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error').that.is.an('error'); + expect(error).to.have.property('errorType', 3); + expect(error.message).to.include('Challenge evaluation failed'); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -203,7 +204,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); this.clock.tick(7000); // tick the timeout }); @@ -219,11 +220,9 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var promise = cloudscraper.get(uri, function (error, response, body) { - // errorType 3, means parsing failed - expect(error).to.be.an('object'); - expect(error).to.own.include({ errorType: 3 }); - expect(error.error).to.be.a('string'); - expect(error.error).to.include('I cant extract challengeId'); + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error', 'challengeId (jschl_vc) extraction failed'); + expect(error).to.have.property('errorType', 3); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -231,7 +230,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); this.clock.tick(7000); // tick the timeout }); @@ -256,15 +255,15 @@ describe('Cloudscraper', function() { var promise = cloudscraper.get(uri, function (error) { // errorType 0, a connection error for example - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 0, error: fakeError }); - expect(error.error).to.be.an('error'); + expect(error).to.be.instanceOf(errors.RequestError); + expect(error).to.have.property('error', fakeError); + expect(error).to.have.property('errorType', 0); expect(Request).to.be.calledTwice; expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.RequestError).and.notify(done); // tick the timeout this.clock.tick(7000); @@ -301,8 +300,9 @@ describe('Cloudscraper', function() { var promise = cloudscraper.get(uri, function (error, response, body) { // errorType 1, means captcha is served - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 1 }); + expect(error).to.be.instanceOf(errors.CaptchaError); + expect(error).to.have.property('error', 'captcha'); + expect(error).to.have.property('errorType', 1); expect(Request).to.be.calledTwice; expect(Request.firstCall).to.be.calledWithExactly(helper.defaultParams); @@ -312,7 +312,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(secondResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); this.clock.tick(7000); // tick the timeout }); @@ -329,11 +329,9 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var promise = cloudscraper.get(uri, function (error, response, body) { - expect(error).to.be.an('object'); - expect(error).to.be.eql({ - errorType: 3, - error: 'I cant extract cookie generation code from page' - }); + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error', 'Cookie code extraction failed'); + expect(error).to.have.property('errorType', 3); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -341,7 +339,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); }); it('should throw a TypeError if callback is not a function', function(done) { @@ -397,8 +395,9 @@ describe('Cloudscraper', function() { var promise = cloudscraper.get(options, function (error, response, body) { // errorType 1, means captcha is served - expect(error).to.be.an('object'); - expect(error).to.be.eql({ errorType: 1 }); + expect(error).to.be.instanceOf(errors.CaptchaError); + expect(error).to.have.property('error', 'captcha'); + expect(error).to.have.property('errorType', 1); expect(Request).to.be.calledOnceWithExactly(firstParams); @@ -406,7 +405,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body.toString('fake-encoding')); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.CaptchaError).and.notify(done); this.clock.tick(7000); // tick the timeout }); @@ -424,11 +423,10 @@ describe('Cloudscraper', function() { Request.callsFake(helper.fakeRequest({ response: expectedResponse })); var promise = cloudscraper.get(uri, function (error, response, body) { - // errorType 3, means parsing failed - expect(error).to.be.an('object'); - expect(error).to.own.include({ errorType: 3 }); - expect(error.error).to.be.a('string'); - expect(error.error).to.include('Error occurred during evaluation: vm eval failed'); + expect(error).to.be.instanceOf(errors.ParserError); + expect(error).to.have.property('error').that.is.an('error'); + expect(error).to.have.property('errorType', 3); + expect(error.message).to.include('vm eval failed'); expect(Request).to.be.calledOnceWithExactly(helper.defaultParams); @@ -436,7 +434,7 @@ describe('Cloudscraper', function() { expect(body).to.be.equal(expectedResponse.body); }); - expect(promise).to.be.rejectedWith(RequestError).and.notify(done); + expect(promise).to.be.rejectedWith(errors.ParserError).and.notify(done); this.clock.tick(7000); // tick the timeout }); From bbc2eda2a7c13ae9c4fdd54df6f9b11afdc76f37 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 01:29:01 -0600 Subject: [PATCH 16/33] Remove callback argument --- index.js | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/index.js b/index.js index ad3fd36..5dbf5a9 100644 --- a/index.js +++ b/index.js @@ -100,13 +100,14 @@ function performRequest(options, isFirstRequest) { options.callback = callback; } - processRequestResponse(options, result, options.callback); + processRequestResponse(options, result); }; return request; } -function processRequestResponse(options, result, callback) { +function processRequestResponse(options, result) { + var callback = options.callback; var error = result.error; var response = result.response; var body = result.body; @@ -144,13 +145,13 @@ function processRequestResponse(options, result, callback) { // If body contains specified string, solve challenge if (isChallengePresent) { setTimeout(function() { - solveChallenge(response, stringBody, options, callback); + solveChallenge(response, stringBody, options); }, options.cloudflareTimeout); } else if (isRedirectChallengePresent) { - setCookieAndReload(response, stringBody, options, callback); + setCookieAndReload(response, stringBody, options); } else { // All is good - processResponseBody(response, body, options, callback); + processResponseBody(response, body, options); } } @@ -173,7 +174,8 @@ function validate(response, body, options) { return false; } -function solveChallenge(response, body, options, callback) { +function solveChallenge(response, body, options) { + var callback = options.callback; var challenge = body.match(/name="jschl_vc" value="(\w+)"/); var host = response.request.host; var jsChlVc; @@ -236,9 +238,10 @@ function solveChallenge(response, body, options, callback) { performRequest(options, false); } -function setCookieAndReload(response, body, options, callback) { - var challenge = body.match(/S='([^']+)'/); +function setCookieAndReload(response, body, options) { + var callback = options.callback; + var challenge = body.match(/S='([^']+)'/); if (!challenge) { var cause = 'Cookie code extraction failed'; var error = new errors.ParserError(cause, options, response); @@ -272,7 +275,8 @@ function setCookieAndReload(response, body, options, callback) { performRequest(options, false); } -function processResponseBody(response, body, options, callback) { +function processResponseBody(response, body, options) { + var callback = options.callback; var error = null; if(typeof options.realEncoding === 'string') { From f7c9b77ae91b8ed4d4fa163c0a12261fd091d9d9 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 01:33:15 -0600 Subject: [PATCH 17/33] Make options always the first argument --- index.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/index.js b/index.js index 5dbf5a9..12c4203 100644 --- a/index.js +++ b/index.js @@ -125,7 +125,7 @@ function processRequestResponse(options, result) { stringBody = body.toString('utf8'); try { - validate(response, stringBody, options); + validate(options, response, stringBody); } catch (error) { return callback(error, response, body); } @@ -145,17 +145,17 @@ function processRequestResponse(options, result) { // If body contains specified string, solve challenge if (isChallengePresent) { setTimeout(function() { - solveChallenge(response, stringBody, options); + solveChallenge(options, response, stringBody); }, options.cloudflareTimeout); } else if (isRedirectChallengePresent) { - setCookieAndReload(response, stringBody, options); + setCookieAndReload(options, response, stringBody); } else { // All is good - processResponseBody(response, body, options); + processResponseBody(options, response, body); } } -function validate(response, body, options) { +function validate(options, response, body) { var match; // Finding captcha @@ -174,7 +174,7 @@ function validate(response, body, options) { return false; } -function solveChallenge(response, body, options) { +function solveChallenge(options, response, body) { var callback = options.callback; var challenge = body.match(/name="jschl_vc" value="(\w+)"/); var host = response.request.host; @@ -238,7 +238,7 @@ function solveChallenge(response, body, options) { performRequest(options, false); } -function setCookieAndReload(response, body, options) { +function setCookieAndReload(options, response, body) { var callback = options.callback; var challenge = body.match(/S='([^']+)'/); @@ -275,7 +275,7 @@ function setCookieAndReload(response, body, options) { performRequest(options, false); } -function processResponseBody(response, body, options) { +function processResponseBody(options, response, body) { var callback = options.callback; var error = null; @@ -289,7 +289,7 @@ function processResponseBody(response, body, options) { // and find potential errors there. // If encoding is not provided, return response as it is try { - validate(response, body, options); + validate(options, response, body); } catch (e) { error = e; } From 79a6f814746bd7899fe1de1b3ecb5f67b210cfea Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 01:35:37 -0600 Subject: [PATCH 18/33] Remove request result destructuring --- index.js | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/index.js b/index.js index 12c4203..3bf6a09 100644 --- a/index.js +++ b/index.js @@ -92,25 +92,21 @@ function performRequest(options, isFirstRequest) { if (called) return; called = true; - var result = { error: error, response: response, body: body }; - if (isFirstRequest) { // We only need the callback from the first request. // The other callbacks can be safely ignored. options.callback = callback; } - processRequestResponse(options, result); + processRequestResponse(options, error, response, body); }; return request; } -function processRequestResponse(options, result) { +function processRequestResponse(options, error, response, body) { var callback = options.callback; - var error = result.error; - var response = result.response; - var body = result.body; + var stringBody; var isChallengePresent; var isRedirectChallengePresent; @@ -176,6 +172,7 @@ function validate(options, response, body) { function solveChallenge(options, response, body) { var callback = options.callback; + var challenge = body.match(/name="jschl_vc" value="(\w+)"/); var host = response.request.host; var jsChlVc; From 32c3b350426e327a072aaa3fa3c2055a4404c43c Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 02:08:29 -0600 Subject: [PATCH 19/33] Add/adjust code comments --- index.js | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/index.js b/index.js index 3bf6a09..582d870 100644 --- a/index.js +++ b/index.js @@ -34,11 +34,11 @@ function defaults(params) { }); // There's no safety net here, any changes apply to all future requests - // that are made with this instance and derived instances + // that are made with this instance and derived instances. cloudscraper.defaultParams = defaultParams; // Ensure this instance gets a copy of our custom defaults function - // It's not necessary on subsequent calls + // and afterwards, it will be copied over automatically. if (isRequestModule) { cloudscraper.defaults = defaults; } @@ -49,6 +49,8 @@ function defaults(params) { return cloudscraper; } +// This function is wrapped to ensure that we get new options on first call. +// The options object is reused in subsequent calls when calling it directly. function performRequest(options, isFirstRequest) { // Prevent overwriting realEncoding in subsequent calls if (!('realEncoding' in options)) { @@ -61,7 +63,6 @@ function performRequest(options, isFirstRequest) { } } - // Requester is wrapped by request to ensure that we get new options on first call options.encoding = null; if (isNaN(options.challengesToSolve)) { @@ -79,6 +80,7 @@ function performRequest(options, isFirstRequest) { var request = requester(options); // This should be a user supplied callback or request-promise's callback. + // The user supplied callback is always wrapped by requester. var callback = request.callback; // If the requester is not request-promise, ensure we get a callback. @@ -104,6 +106,8 @@ function performRequest(options, isFirstRequest) { return request; } +// The argument convention is options first where possible, options +// always before response, and body always after response. function processRequestResponse(options, error, response, body) { var callback = options.callback; @@ -159,7 +163,7 @@ function validate(options, response, body) { throw new errors.CaptchaError('captcha', options, response); } - // trying to find '1006' + // Trying to find '1006' match = body.match(/<\w+\s+class="cf-error-code">(.*)<\/\w+>/i); if (match) { @@ -223,15 +227,15 @@ function solveChallenge(options, response, body) { answerUrl = response.request.uri.protocol + '//' + host + '/cdn-cgi/l/chk_jschl'; - // Prevent reusing the headers object in subsequent calls as this affects tests + // Prevent reusing the headers object to simplify unit testing. options.headers = Object.assign({}, options.headers); - // Original uri should be placed as referer + // The original uri should be placed as referer. options.headers['Referer'] = response.request.uri.href; options.uri = answerUrl; options.qs = answerResponse; options.challengesToSolve = options.challengesToSolve - 1; - // Make request with answer + // Make request with answer. performRequest(options, false); } @@ -278,13 +282,12 @@ function processResponseBody(options, response, body) { if(typeof options.realEncoding === 'string') { body = body.toString(options.realEncoding); - // The resolveWithFullResponse option will resolve with the response object. - // This changes the response.body so it is as expected. + // The resolveWithFullResponse option will resolve with the response + // object. This changes the response.body so it is as expected. response.body = body; - // In case of real encoding, try to validate the response - // and find potential errors there. - // If encoding is not provided, return response as it is + // In case of real encoding, try to validate the response and find + // potential errors there, otherwise return the response as is. try { validate(options, response, body); } catch (e) { From 95465e980ea3acd5288d62abc32d8f6f4d6ff8d6 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 06:06:08 -0600 Subject: [PATCH 20/33] Use correct hostname when using proxy --- index.js | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/index.js b/index.js index 582d870..3a56f17 100644 --- a/index.js +++ b/index.js @@ -178,10 +178,9 @@ function solveChallenge(options, response, body) { var callback = options.callback; var challenge = body.match(/name="jschl_vc" value="(\w+)"/); - var host = response.request.host; + var uri = response.request.uri; var jsChlVc; var answerResponse; - var answerUrl; var error; var cause; @@ -215,7 +214,7 @@ function solveChallenge(options, response, body) { try { answerResponse = { 'jschl_vc': jsChlVc, - 'jschl_answer': (eval(challenge) + response.request.host.length), + 'jschl_answer': (eval(challenge) + uri.hostname.length), 'pass': challenge_pass }; } catch (error) { @@ -225,13 +224,11 @@ function solveChallenge(options, response, body) { return callback(error, response, body); } - answerUrl = response.request.uri.protocol + '//' + host + '/cdn-cgi/l/chk_jschl'; - // Prevent reusing the headers object to simplify unit testing. options.headers = Object.assign({}, options.headers); - // The original uri should be placed as referer. - options.headers['Referer'] = response.request.uri.href; - options.uri = answerUrl; + // Use the original uri as the referer and to construct the answer url. + options.headers['Referer'] = uri.href; + options.uri = uri.protocol + '//' + uri.hostname + '/cdn-cgi/l/chk_jschl'; options.qs = answerResponse; options.challengesToSolve = options.challengesToSolve - 1; From 13efbeed60893b54c7478f65b14d1bcb67b39574 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 07:47:41 -0600 Subject: [PATCH 21/33] Make request uri testable --- test/helper.js | 29 ++++++++++++------------ test/rp.js | 17 ++++---------- test/test-index.js | 55 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 28 deletions(-) diff --git a/test/helper.js b/test/helper.js index a1d5571..e7783bf 100644 --- a/test/helper.js +++ b/test/helper.js @@ -1,7 +1,6 @@ var request = require('./rp'); var sinon = require('sinon'); var fs = require('fs'); -var url = require('url'); var path = require('path'); var defaultParams = { @@ -34,23 +33,12 @@ module.exports = { return cache[fileName]; }, defaultParams: defaultParams, - // This method returns properly faked response object for request lib, which is used inside cloudscraper library fakeResponse: function(template) { - var fake = Object.assign({ + return Object.assign({ statusCode: 200, headers: defaultParams.headers, body: '' }, template); - - // The uri property of the fake response is only for tests to simplify fake request creation. - var uri = url.parse(fake.uri || defaultParams.uri); - // The actual request object is more complicated but this library only uses the uri parts. - fake.request = { - host: uri.host, - uri: uri - }; - - return fake; }, extendParams: function(params) { // Extend target with the default params and provided params @@ -76,7 +64,18 @@ module.exports = { Object.freeze(fake); Object.keys(fake).forEach(function (key) { if (!Object.isFrozen(fake[key]) && !Buffer.isBuffer(fake[key])) { - Object.freeze(fake[key]); + // Mark all existing properties as non-configurable and non-writable. + var target = fake[key]; + Object.keys(target).forEach(function (key) { + var desc = Object.getOwnPropertyDescriptor(target, key); + if (desc.configurable) { + desc.configurable = false; + if (desc.writable !== undefined) { + desc.writable = false; + } + Object.defineProperty(target, key, desc); + } + }); } }); @@ -98,6 +97,8 @@ module.exports = { return callback; }, set: function(callback) { + // Add the final property needed to fake the response. + fake.response.request = instance; // This won't callback unless cloudscraper replaces the callback. callback(fake.error, fake.response, fake.body); } diff --git a/test/rp.js b/test/rp.js index 096189a..bd9462d 100644 --- a/test/rp.js +++ b/test/rp.js @@ -1,25 +1,16 @@ 'use strict'; // Reproduces: https://github.com/request/request-promise/blob/6d11ddc63dde2462a8e39cd8d0b6956556b977f1/lib/rp.js - -// This library almost exactly reproduces the real rp library. -// The primary difference being that we're pre-patching the init method. // It must be done this way because request-promise bypasses require.cache. var Bluebird = require('bluebird').getNewLibraryCopy(); var configure = require('request-promise-core/configure/request2'); var request = require('request'); -// The real rp library works by replacing this init function. -// It will store the original callback from options, -// apply our init function, and wrap the request instance's callback. -function init(options) { - // Request -> Request.prototype.init -> Request.prototype.start - // The test/helper is responsible for calling back with a fake response. -} - -// Replacing init with a noop prevents real requests from being made. -request.Request.prototype.init = init; +// Replacing start with a noop prevents real requests from being made. +// Request -> Request.prototype.init -> Request.prototype.start +// The test/helper is responsible for calling back with a fake response. +request.Request.prototype.start = function(){}; configure({ request: request, diff --git a/test/test-index.js b/test/test-index.js index bfee2b2..ae3b6a4 100644 --- a/test/test-index.js +++ b/test/test-index.js @@ -14,6 +14,7 @@ describe('Cloudscraper', function () { var Request; beforeEach(function () { + helper.defaultParams.jar = request.jar(); sandbox = sinon.createSandbox(); // Prepare stubbed Request for each test Request = sandbox.stub(request, 'Request'); @@ -206,7 +207,7 @@ describe('Cloudscraper', function () { 'pass': '1543827246.024-hvxyNA3rOg' }, headers: { - 'Referer': 'http://example-site.dev/path/' + 'Referer': 'http://example-site.dev/cdn-cgi/l/chk_jschl?jschl_vc=427c2b1cd4fba29608ee81b200e94bfa&jschl_answer=10.66734594&pass=1543827239.915-44n9IE20mS' }, challengesToSolve: 1 }); @@ -345,6 +346,58 @@ describe('Cloudscraper', function () { expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); }); + it('should not use proxy\'s uri', function (done) { + + var firstParams = helper.extendParams({ + proxy: 'https://example-proxy-site.dev/path/' + }); + + var firstResponse = helper.fakeResponse({ + statusCode: 503, + body: helper.getFixture('js_challenge_03_12_2018_1.html') + }); + + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + proxy: 'https://example-proxy-site.dev/path/', + uri: 'http://example-site.dev/cdn-cgi/l/chk_jschl', + qs: { + 'jschl_vc': '427c2b1cd4fba29608ee81b200e94bfa', + 'jschl_answer': -5.33265406 + 'example-site.dev'.length, // -5.33265406 is a answer to cloudflares js challenge + // in this particular case + 'pass': '1543827239.915-44n9IE20mS' + }, + headers: { + 'Referer': 'http://example-site.dev/path/' + }, + challengesToSolve: 2 + }); + + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + Request.onSecondCall() + .callsFake(helper.fakeRequest({ response: secondResponse })); + + var options = { uri: uri, proxy: 'https://example-proxy-site.dev/path/' }; + + var promise = cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + }); + + expect(promise).to.eventually.equal(secondResponse.body).and.notify(done); + + this.clock.tick(14000); // tick the timeout + }); + it('should define custom defaults function', function (done) { expect(cloudscraper.defaults).to.not.equal(request.defaults); From 2516f2eb73a831c249bb8ef26681c56f9ec0e944 Mon Sep 17 00:00:00 2001 From: pro-src Date: Sun, 3 Mar 2019 19:10:50 -0600 Subject: [PATCH 22/33] Listen to error and complete events --- index.js | 34 +++++++++++++++++----------------- test/helper.js | 29 ++++++++++++----------------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/index.js b/index.js index 3a56f17..26620e3 100644 --- a/index.js +++ b/index.js @@ -79,30 +79,30 @@ function performRequest(options, isFirstRequest) { } var request = requester(options); - // This should be a user supplied callback or request-promise's callback. - // The user supplied callback is always wrapped by requester. - var callback = request.callback; // If the requester is not request-promise, ensure we get a callback. - if (typeof callback !== 'function') { + if (typeof request.callback !== 'function') { throw new TypeError('Expected a callback function, got ' - + typeof(callback) + ' instead.'); + + typeof(request.callback) + ' instead.'); } - var called = false; - request.callback = function(error, response, body) { - if (called) return; - - called = true; - if (isFirstRequest) { - // We only need the callback from the first request. - // The other callbacks can be safely ignored. - options.callback = callback; - } + // We only need the callback from the first request. + // The other callbacks can be safely ignored. + if (isFirstRequest) { + // This should be a user supplied callback or request-promise's callback. + // The callback is always wrapped/bound to the request instance. + options.callback = request.callback; + } - processRequestResponse(options, error, response, body); - }; + // The error event only provides an error argument. + request.removeAllListeners('error') + .once('error', processRequestResponse.bind(null, options)); + // The complete event only provides response and body arguments. + request.removeAllListeners('complete') + .once('complete', processRequestResponse.bind(null, options, null)); + // Indicate that this is a cloudscraper request, required by test/helper. + request.cloudscraper = true; return request; } diff --git a/test/helper.js b/test/helper.js index e7783bf..70c602d 100644 --- a/test/helper.js +++ b/test/helper.js @@ -80,27 +80,22 @@ module.exports = { }); return function Request(params) { - // The promise returned by request-promise won't resolve until - // it's callback is called. The problem is that we need to callback - // after the constructor returns to simulate a real request/response. var instance = request(params); - // This is the callback that cloudscraper should replace. - var callback = instance.callback; + // This is a hack to prevent sending events to early. See #104 + Object.defineProperty(instance, 'cloudscraper', { + set: function() { + // Add the required convenience property to fake the response. + fake.response.request = this; - // We don't want to callback with the fake result until - // after the constructor returns thus define a property getter/setter - // and wait for cloudscraper to set it's own callback. - Object.defineProperty(instance, 'callback', { - get: function() { - // Returns request-promise's callback. - return callback; + if (fake.error !== null) { + this.emit('error', fake.error); + } else { + this.emit('complete', fake.response, fake.body); + } }, - set: function(callback) { - // Add the final property needed to fake the response. - fake.response.request = instance; - // This won't callback unless cloudscraper replaces the callback. - callback(fake.error, fake.response, fake.body); + get: function() { + return true; } }); From 2d7668a8bd3957a43dbc797b7fc6f1f23bdad17e Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Tue, 5 Mar 2019 11:21:40 +0100 Subject: [PATCH 23/33] Fix #75 by running eval inside vm with a timeout --- index.js | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/index.js b/index.js index 26620e3..dd68e66 100644 --- a/index.js +++ b/index.js @@ -1,6 +1,9 @@ var vm = require('vm'); var requestModule = require('request-promise'); var errors = require('./errors'); +var VM_OPTIONS = { + timeout: 5000 +}; module.exports = defaults.call(requestModule); @@ -181,6 +184,7 @@ function solveChallenge(options, response, body) { var uri = response.request.uri; var jsChlVc; var answerResponse; + var solvedChallenge; var error; var cause; @@ -212,11 +216,7 @@ function solveChallenge(options, response, body) { challenge = challenge.replace(/'; \d+'/g, ''); try { - answerResponse = { - 'jschl_vc': jsChlVc, - 'jschl_answer': (eval(challenge) + uri.hostname.length), - 'pass': challenge_pass - }; + solvedChallenge = vm.runInNewContext(challenge, Object.create(null), VM_OPTIONS); } catch (error) { error.message = 'Challenge evaluation failed: ' + error.message; error = new errors.ParserError(error, options, response); @@ -224,6 +224,12 @@ function solveChallenge(options, response, body) { return callback(error, response, body); } + answerResponse = { + 'jschl_vc': jsChlVc, + 'jschl_answer': (solvedChallenge + uri.hostname.length), + 'pass': challenge_pass + }; + // Prevent reusing the headers object to simplify unit testing. options.headers = Object.assign({}, options.headers); // Use the original uri as the referer and to construct the answer url. @@ -258,7 +264,7 @@ function setCookieAndReload(options, response, body) { }; try { - vm.runInNewContext(cookieSettingCode, sandbox); + vm.runInNewContext(cookieSettingCode, sandbox, VM_OPTIONS); options.jar.setCookie(sandbox.document.cookie, response.request.uri.href, {ignoreError: true}); } catch (error) { From 680c34f7aeb538b0f17bbd3da2b96b13f91e4a50 Mon Sep 17 00:00:00 2001 From: pro-src Date: Tue, 5 Mar 2019 04:54:18 -0600 Subject: [PATCH 24/33] Fixes #102 --- test/test-index.js | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/test/test-index.js b/test/test-index.js index ae3b6a4..02cffa8 100644 --- a/test/test-index.js +++ b/test/test-index.js @@ -398,6 +398,68 @@ describe('Cloudscraper', function () { this.clock.tick(14000); // tick the timeout }); + it('should reuse the provided cookie jar', function(done) { + var customJar = request.jar(); + + var firstParams = helper.extendParams({ jar: customJar }); + + var firstResponse = helper.fakeResponse({ + body: helper.getFixture('js_challenge_cookie.html') + }); + + // Cloudflare is enabled for site. + // It returns a redirecting page if a (session) cookie is unset. + Request.onFirstCall() + .callsFake(helper.fakeRequest({ response: firstResponse })); + + var secondParams = helper.extendParams({ + jar: customJar, + challengesToSolve: 2 + }); + + var secondResponse = helper.fakeResponse({ body: requestedPage }); + + // Only callback with the second response if the cookie string matches + var matchCookie = sinon.match(function (params) { + return params.jar.getCookieString(uri) === 'sucuri_cloudproxy_uuid_575ef0f62=16cc0aa4400d9c6961cce3ce380ce11a'; + }); + + // Prevent a matching error if for some reason params.jar is missing or invalid. + var matchParams = sinon.match.has('jar', sinon.match.object).and(matchCookie); + + Request.withArgs(matchParams) + .callsFake(helper.fakeRequest({ response: secondResponse })); + + // We need to override cloudscraper's default jar for this test + var options = { uri: uri, jar: customJar }; + + customJar.setCookie('custom cookie', 'http://custom-site.dev/'); + + cloudscraper.get(options, function (error, response, body) { + expect(error).to.be.null; + + expect(Request).to.be.calledTwice; + expect(Request.firstCall).to.be.calledWithExactly(firstParams); + expect(Request.secondCall).to.be.calledWithExactly(secondParams); + + expect(response).to.be.equal(secondResponse); + expect(body).to.be.equal(secondResponse.body); + + var customCookie = customJar.getCookieString('http://custom-site.dev/'); + expect(customCookie).to.equal('custom cookie'); + + cloudscraper.get(options, function(error, response, body) { + expect(error).to.be.null; + + expect(Request.thirdCall.args[0].jar).to.equal(customJar); + customCookie = customJar.getCookieString('http://custom-site.dev/'); + expect(customCookie).to.equal('custom cookie'); + + done(); + }); + }); + }); + it('should define custom defaults function', function (done) { expect(cloudscraper.defaults).to.not.equal(request.defaults); From 8240ee23e67525468d34dc9de75b6908b34f09ba Mon Sep 17 00:00:00 2001 From: pro-src Date: Tue, 5 Mar 2019 08:06:18 -0600 Subject: [PATCH 25/33] Fixes #112 --- index.js | 59 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/index.js b/index.js index dd68e66..bbcdb53 100644 --- a/index.js +++ b/index.js @@ -1,6 +1,7 @@ var vm = require('vm'); var requestModule = require('request-promise'); var errors = require('./errors'); + var VM_OPTIONS = { timeout: 5000 }; @@ -180,43 +181,47 @@ function validate(options, response, body) { function solveChallenge(options, response, body) { var callback = options.callback; - var challenge = body.match(/name="jschl_vc" value="(\w+)"/); var uri = response.request.uri; - var jsChlVc; - var answerResponse; - var solvedChallenge; + // The JS challenge to be evaluated for answer/response. + var challenge; + // The result of challenge being evaluated in sandbox + var answer; + // The query string to send back to Cloudflare + // var payload = { jschl_vc, jschl_answer, pass }; + var payload = {}; + + var match; var error; var cause; - if (!challenge) { + match = body.match(/name="jschl_vc" value="(\w+)"/); + + if (!match) { cause = 'challengeId (jschl_vc) extraction failed'; error = new errors.ParserError(cause, options, response); return callback(error, response, body); } - jsChlVc = challenge[1]; + payload.jschl_vc = match[1]; - challenge = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i); + match = body.match(/getElementById\('cf-content'\)[\s\S]+?setTimeout.+?\r?\n([\s\S]+?a\.value =.+?)\r?\n/i); - if (!challenge) { + if (!match) { cause = 'setTimeout callback extraction failed'; error = new errors.ParserError(cause, options, response); return callback(error, response, body); } - var challenge_pass = body.match(/name="pass" value="(.+?)"/)[1]; - - challenge = challenge[1]; - - challenge = challenge.replace(/a\.value =(.+?) \+ .+?;/i, '$1'); - - challenge = challenge.replace(/\s{3,}[a-z](?: = |\.).+/g, ''); - challenge = challenge.replace(/'; \d+'/g, ''); + challenge = match[1] + .replace(/a\.value =(.+?) \+ .+?;/i, '$1') + .replace(/\s{3,}[a-z](?: = |\.).+/g, '') + .replace(/'; \d+'/g, ''); try { - solvedChallenge = vm.runInNewContext(challenge, Object.create(null), VM_OPTIONS); + answer = vm.runInNewContext(challenge, undefined, VM_OPTIONS); + payload.jschl_answer = answer + uri.hostname.length; } catch (error) { error.message = 'Challenge evaluation failed: ' + error.message; error = new errors.ParserError(error, options, response); @@ -224,18 +229,24 @@ function solveChallenge(options, response, body) { return callback(error, response, body); } - answerResponse = { - 'jschl_vc': jsChlVc, - 'jschl_answer': (solvedChallenge + uri.hostname.length), - 'pass': challenge_pass - }; - + match = body.match(/name="pass" value="(.+?)"/); + + if (!match) { + cause = 'Attribute (pass) value extraction failed'; + error = new errors.ParserError(cause, options, response); + + return callback(error, response, body); + } + + payload.pass = match[1]; + // Prevent reusing the headers object to simplify unit testing. options.headers = Object.assign({}, options.headers); // Use the original uri as the referer and to construct the answer url. options.headers['Referer'] = uri.href; options.uri = uri.protocol + '//' + uri.hostname + '/cdn-cgi/l/chk_jschl'; - options.qs = answerResponse; + // Set the query string and decrement the number of challenges to solve. + options.qs = payload; options.challengesToSolve = options.challengesToSolve - 1; // Make request with answer. From 6135a618da1cef5aa0482a1be0fc3c94fc7ed583 Mon Sep 17 00:00:00 2001 From: pro-src Date: Tue, 5 Mar 2019 15:01:29 -0600 Subject: [PATCH 26/33] Closes #78 Bug Reports --- errors.js | 75 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/errors.js b/errors.js index 1edf3e8..21bff5e 100644 --- a/errors.js +++ b/errors.js @@ -1,8 +1,5 @@ 'use strict'; -var original = require('request-promise-core/errors'); -var OriginalError = original.RequestError; - // The purpose of this library is two-fold. // 1. Have errors consistent with request/promise-core // 2. Prevent request/promise core from wrapping our errors @@ -11,6 +8,44 @@ var OriginalError = original.RequestError; // 1. There is a non-enumerable errorType attribute. // 2. The error constructor is hidden from the stacktrace. +var EOL = require('os').EOL; +var BUG_REPORT = format([ + '### Cloudflare may have changed their technique, or there may be a bug.', + '### Bug Reports: https://github.com/codemanki/cloudscraper/issues', + '### Check the detailed exception message that follows for the cause.' +]); + +var original = require('request-promise-core/errors'); +var OriginalError = original.RequestError; + +var RequestError = create('RequestError', 0); +var CaptchaError = create('CaptchaError', 1); +var CloudflareError = create('CloudflareError', 2); +var ParserError = create('ParserError', 3); +// errorType 4 is a CloudflareError so that constructor is reused. + +// The following errors originate from promise-core and it's dependents. +// Give them an errorType for consistency. +original.StatusCodeError.prototype.errorType = 5; +original.TransformError.prototype.errorType = 6; + +// This replaces the RequestError for all libraries using request/promise-core +// and prevents silent failure. +Object.defineProperty(original, 'RequestError', { + configurable: true, + enumerable: true, + writable: true, + value: RequestError +}); + +// Export our custom errors along with StatusCodeError, etc. +Object.assign(module.exports, original, { + RequestError: RequestError, + CaptchaError: CaptchaError, + ParserError: ParserError, + CloudflareError: CloudflareError +}); + function create(name, errorType) { function CustomError(cause, options, response) { @@ -25,6 +60,10 @@ function create(name, errorType) { // Change the name to match this constructor this.name = name; + if (this instanceof ParserError) { + this.message = BUG_REPORT + this.message; + } + if (Error.captureStackTrace) { // required for non-V8 environments // Provide a proper stack trace that hides this constructor Error.captureStackTrace(this, CustomError); @@ -46,30 +85,6 @@ function create(name, errorType) { return CustomError; } -var RequestError = create('RequestError', 0); -var CaptchaError = create('CaptchaError', 1); -var CloudflareError = create('CloudflareError', 2); -var ParserError = create('ParserError', 3); -// errorType 4 is a CloudflareError so that constructor is reused. - -// The following errors originate from promise-core and it's dependents. -// Give them an errorType for consistency. -original.StatusCodeError.prototype.errorType = 5; -original.TransformError.prototype.errorType = 6; - -// This replaces the RequestError for all libraries using request/promise-core -// and prevents silent failure. -Object.defineProperty(original, 'RequestError', { - configurable: true, - enumerable: true, - writable: true, - value: RequestError -}); - -// Export our custom errors along with StatusCodeError, etc. -Object.assign(module.exports, original, { - RequestError: RequestError, - CaptchaError: CaptchaError, - ParserError: ParserError, - CloudflareError: CloudflareError -}); +function format(lines) { + return EOL + lines.join(EOL) + EOL + EOL; +} From 35ea10f2c5558e445dea520a58b1ea02b08d7b96 Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 15:48:20 +0100 Subject: [PATCH 27/33] Changelog --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c102d1..40a1e45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ ## Change Log +### v3.0.0 (07/03/2019) +- **BREAKING CHANGE**: `get/post` methods together with their signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod) +- **BREAKING CHANGE**: `cloudscraper.request` method is deprecated in favour of `cloudscraper(options)` +- Promise support has been added by using `request-promise` +- Error object are inherited from Error and have additional properties. + * `options` - The request options + * `cause` - An alias for `error` + * `response` - The request response +- Stacktraces are available in error objects +- `cloudflareTimeout` option can be defined to speed up waiting time +- Challenge evaluation is done in a sandbox to avoid potential secutiry issues +- Default [request methods](https://github.com/request/request#requestmethod) are available +- MIT license + +### v2.0.1 (02/03/2019) +- Minor documentation changes + ### v2.0.0 (09/12/2018) - [#2943](https://github.com/codemanki/cloudscraper/pull/66) Support recursive challenge solving. - **BREAKING CHANGE** Before this, when any error has been detected, the callback was called with an incorrect order: `callback(.., body, response);` instead of `return callback(..., response, body);` + From ee3b548c1ff4e92c664a4d16faadaa9dc584bf9a Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 15:59:44 +0100 Subject: [PATCH 28/33] 2 more items for the changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40a1e45..75962c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ - `cloudflareTimeout` option can be defined to speed up waiting time - Challenge evaluation is done in a sandbox to avoid potential secutiry issues - Default [request methods](https://github.com/request/request#requestmethod) are available +- Custom cookie jar can now be passed [#103](https://github.com/codemanki/cloudscraper/issues/102) +- Proxies support [PR#101](https://github.com/codemanki/cloudscraper/pull/101) - MIT license ### v2.0.1 (02/03/2019) From 89aae64b62819d5d5e8ac74bbaeacfd06ec4584a Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 16:23:24 +0100 Subject: [PATCH 29/33] Add migration section --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index c1dc2f5..301ab64 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,22 @@ __Unfortunately, there is no support for handling a CAPTCHA, if the response con If you notice that for some reason cloudscraper stopped to work, do not hesitate and get in touch with me ( by creating an issue here, for example), so i can update it. +Migration from v2 to v3 +============ +- Replace `cloudscraper.request(options)` with `cloudscraper(options)` +- `cloudscraper.get()` and `cloudscraper.post()` method signatures are now +- If you are using custom promise support workarounds please remove them as cloudscraper now uses [request-promise](https://github.com/request/request-promise): +``` +var cloudscraper = require('cloudscraper'); +var options = { + uri: 'https://website.com/', + method: 'GET' +}; +cloudscraper(options).then(function(body) { + console.log(body); +}); +``` + Install ============ ```javascript From 6d40881c07412e8652e1b198e3fbb9ef7cc7ccee Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 16:30:03 +0100 Subject: [PATCH 30/33] Add more clarification to the migration section --- README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 301ab64..c236b8d 100644 --- a/README.md +++ b/README.md @@ -32,14 +32,38 @@ If you notice that for some reason cloudscraper stopped to work, do not hesitate Migration from v2 to v3 ============ - Replace `cloudscraper.request(options)` with `cloudscraper(options)` -- `cloudscraper.get()` and `cloudscraper.post()` method signatures are now +- `cloudscraper.get()` and `cloudscraper.post()` method signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod): +``` +var options = { + uri: 'https://website.com/', + headers: {/*...*/} +}; + +cloudscraper.get(options).then(function(body) { + console.log(body); +}); +``` +or for **POST** +``` +var options = { + uri: 'https://website.com/', + headers: {/*...*/}, + formData: { field1: 'value', field2: 2 } +}; + +cloudscraper.post(options).then(function(body) { + console.log(body); +}); +``` - If you are using custom promise support workarounds please remove them as cloudscraper now uses [request-promise](https://github.com/request/request-promise): + ``` var cloudscraper = require('cloudscraper'); var options = { uri: 'https://website.com/', method: 'GET' }; + cloudscraper(options).then(function(body) { console.log(body); }); From 6663c9687f1c1c8011f5c21b57632dcdf0c5cceb Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 17:03:19 +0100 Subject: [PATCH 31/33] Updated readme --- README.md | 107 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index c236b8d..f6052af 100644 --- a/README.md +++ b/README.md @@ -35,23 +35,23 @@ Migration from v2 to v3 - `cloudscraper.get()` and `cloudscraper.post()` method signatures are aligned with corresponding methods from [request](https://github.com/request/request#requestmethod): ``` var options = { - uri: 'https://website.com/', - headers: {/*...*/} + uri: 'https://website.com/', + headers: {/*...*/} }; -cloudscraper.get(options).then(function(body) { +cloudscraper.get(options, function(error, response, body) { console.log(body); }); ``` or for **POST** ``` var options = { - uri: 'https://website.com/', - headers: {/*...*/}, - formData: { field1: 'value', field2: 2 } + uri: 'https://website.com/', + headers: {/*...*/}, + formData: { field1: 'value', field2: 2 } }; -cloudscraper.post(options).then(function(body) { +cloudscraper.post(options, function(error, response, body) { console.log(body); }); ``` @@ -80,7 +80,7 @@ Usage ```javascript var cloudscraper = require('cloudscraper'); -cloudscraper.get('http://website.com/', function(error, response, body) { +cloudscraper.get('https://website.com/', function(error, response, body) { if (error) { console.log('Error occurred'); } else { @@ -92,30 +92,78 @@ cloudscraper.get('http://website.com/', function(error, response, body) { or for `POST` action: ```javascript -cloudscraper.post('http://website.com/', {field1: 'value', field2: 2}, function(error, response, body) { - ... +var options = { + uri: 'https://website.com/', + formData: { field1: 'value', field2: 2 } +}; + +cloudscraper.post(options, function(error, response, body) { + console.log(body); }); ``` -A generic request can be made with `cloudscraper.request(options, callback)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option: +A generic request can be made with `cloudscraper(options, callback)`. The options object should follow [request's options](https://www.npmjs.com/package/request#request-options-callback). Not everything is supported however, for example http methods other than GET and POST. If you wanted to request an image in binary data you could use the encoding option: ```javascript -cloudscraper.request({method: 'GET', - url:'http://website.com/image', - encoding: null, - challengesToSolve: 3, // optional, if CF returns challenge after challenge, how many to solve before failing - followAllRedirects: true, // mandatory for successful challenge solution - }, function(err, response, body) { - //body is now a buffer object instead of a string +var options = { + method: 'GET', + url:'http://website.com/', +}; + +cloudscraper(options, function(err, response, body) { + console.log(response) }); ``` -## Error object -Error object has following structure: -``` -var error = {errorType: 0, error:...}; +## Advanced usage +Cloudscraper wraps request and request-promise, so using cloudscraper is pretty much like using those two libraries. + - Cloudscraper exposes [the same request methods as request](https://github.com/request/request#requestmethod): + `cloudscraper.get(options, callback)` + `cloudscraper.post(options, callback)` + `cloudscraper(uri)` + Please refer to request's documentation for further instructions + - Cloudscraper uses request-promise, promise chaining is done exactly the same as described in [docs](https://github.com/request/request-promise#cheat-sheet): + ``` + cloudscraper(options) + .then(function (htmlString) { + }) + .catch(function (err) { + }); + ``` + +## Default options +Cloudscraper exposes following options that areq required by default but might be changed. Please note that default options increase chances of correct work. + ``` +var options = { + uri: 'https://website', + jar: requestModule.jar(), // Custom cookie jar + headers: { + // User agent, Cache Control and Accept headers are required + 'User-Agent': 'Ubuntu Chromium/34.0.1847.116 Chrome/34.0.1847.116 Safari/537.36', + 'Cache-Control': 'private', + 'Accept': 'application/xml,application/xhtml+xml,text/html;q=0.9, text/plain;q=0.8,image/png,*/*;q=0.5' + }, + // Cloudflare requires a delay of 5 seconds, so wait for at least 6. + cloudflareTimeout: 6000, + // followAllRedirects - follow non-GET HTTP 3xx responses as redirects + followAllRedirects: true, + // Support only this max challenges in row. If CF returns more, throw an error + challengesToSolve: 3 +}; +cloudscraper(options, function(error, response, body) { + console.log(body) +}); + +``` +## Error object +Cliudscraper error object inherits from `Error` has following fields: + * `name` - `RequestError`/`CaptchaError`/`CloudflareError`/`ParserError` + * `options` - The request options + * `cause` - An alias for `error` + * `response` - The request response + * `errorType` - Custom error code Where `errorType` can be following: - `0` if request to page failed due to some native reason as bad url, http connection or so. `error` in this case will be error [event](http://nodejs.org/api/http.html#http_class_http_server) - `1` cloudflare returned captcha. Nothing to do here. Bad luck @@ -123,7 +171,6 @@ Where `errorType` can be following: - `3` this error is returned when library failed to parse and solve js challenge. `error` will be `String` with some details. :warning: :warning: __Most likely it means that cloudflare have changed their js challenge.__ - `4` CF went into a loop and started to return challenge after challenge. If number of solved challenges is greater than `3` and another challenge is returned, throw an error - Running tests ============ Clone this repo, do `npm install` and then just `grunt` @@ -131,12 +178,9 @@ Clone this repo, do `npm install` and then just `grunt` ### Unknown error? Library stopped working? ### Let me know, by opening [issue](https://github.com/codemanki/cloudscraper/issues) in this repo and i will update library asap. Please, provide url and body of page where cloudscraper failed. - -CloudScraper uses [Request](https://github.com/request/request) to perform requests. - WAT =========== -Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get` you should expect it to return result after min 6 seconds. +Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as desceribed in above and pass `cloudflareTimeout` options with your value. But be aware that cloudflare might track this timeout and use ir against you ;) ## TODO - [x] Check for recaptcha @@ -145,17 +189,20 @@ Current cloudflare implementation requires browser to respect the timeout of 5 s - [x] Add proper testing - [x] Remove manual 302 processing, replace with `followAllRedirects` param - [ ] Parse out the timeout from chalenge page - - [ ] Reoder the arguments in get/post/request methods and allow custom options to be passed in + - [x] Reoder the arguments in get/post/request methods and allow custom options to be passed in - [ ] Expose solve methods to use them independently - [ ] Support recaptcha solving - - [ ] Promisification + - [x] Promisification ## Kudos to contributors + - [Dwayne](https://github.com/pro-src) by himself rewrote the whole library, closed bunch of issues and feature requests. Praise him for 3.0.0 version <3 - [roflmuffin](https://github.com/roflmuffin) - [Colecf](https://github.com/Colecf) - [Jeongbong Seo](https://github.com/jngbng) - [Kamikadze4GAME](https://github.com/Kamikadze4GAME) ## Dependencies -* request https://github.com/request/request +* [request](https://github.com/request/request) +* [request-promise](https://github.com/request/request-promise) + From 3fdc78b049436660d0b214ab6198e8915da094d2 Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 17:03:50 +0100 Subject: [PATCH 32/33] Bump version to 3.0.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index aa249e0..f5d8e92 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cloudscraper", - "version": "2.0.1", + "version": "3.0.0", "description": "Bypasses cloudflare's anti-ddos page", "main": "index.js", "scripts": { From abcc968b6f05c177eb63284ae10db2247de7d427 Mon Sep 17 00:00:00 2001 From: Oleksii Sribnyi Date: Thu, 7 Mar 2019 17:04:59 +0100 Subject: [PATCH 33/33] Fix typos --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f6052af..6b12b05 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,7 @@ Let me know, by opening [issue](https://github.com/codemanki/cloudscraper/issues WAT =========== -Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as desceribed in above and pass `cloudflareTimeout` options with your value. But be aware that cloudflare might track this timeout and use ir against you ;) +Current cloudflare implementation requires browser to respect the timeout of 5 seconds and cloudscraper mimics this behaviour. So everytime you call `cloudscraper.get/post` you should expect it to return result after minimum 6 seconds. If you want to change this behaviour, you would need to make a generic request as desceribed in above and pass `cloudflareTimeout` options with your value. But be aware that cloudflare might track this timeout and use it against you ;) ## TODO - [x] Check for recaptcha @@ -195,7 +195,7 @@ Current cloudflare implementation requires browser to respect the timeout of 5 s - [x] Promisification ## Kudos to contributors - - [Dwayne](https://github.com/pro-src) by himself rewrote the whole library, closed bunch of issues and feature requests. Praise him for 3.0.0 version <3 + - [Dwayne](https://github.com/pro-src) by himself rewrote the whole library, closed bunch of issues and feature requests. Praise him for 3.0.0 version ❤️ - [roflmuffin](https://github.com/roflmuffin) - [Colecf](https://github.com/Colecf) - [Jeongbong Seo](https://github.com/jngbng)