-
Notifications
You must be signed in to change notification settings - Fork 9
/
phantom_script.js
111 lines (97 loc) · 4.15 KB
/
phantom_script.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// Unfortunately, since this is processed dynamically, it needs to be JavaScript and not CoffeeScript. https://github.com/ariya/phantomjs/issues/12410
var page = require('webpage').create();
// webpage package is documented at http://phantomjs.org/api/webpage/
var system = require('system');
var __url = system.args[1];
var totalIterations = 0;
var renderPage = function (_url) {
var url = _url.replace(/\/$/, '');
var intervalId = false;
var isReadyForSpiderable = false;
var realStatus = null;
var headers = [];
var isReady = function () {
return page.evaluate(function () {
if (typeof window.Meteor === 'undefined' || window.Meteor.status === undefined || !window.Meteor.status().connected) {
return { ready: false };
}
if (typeof window.Package === 'undefined' || window.Package['jazeee:spiderable-longer-timeout'] === undefined || window.Package['jazeee:spiderable-longer-timeout'].Spiderable === undefined || !window.Package['jazeee:spiderable-longer-timeout'].Spiderable._initialSubscriptionsStarted) {
return { ready: false };
}
isReadyForSpiderable = window.Meteor.isRouteComplete || window.Meteor.isReadyForSpiderable;
// We only need one of these flags set in order to proceed. I will deprecate Meteor.isRouteComplete after 2015-12-31
if (!isReadyForSpiderable) {
return { ready: false };
}
if (typeof window.Tracker === 'undefined' || typeof window.DDP === 'undefined') {
return { ready: false };
}
window.Tracker.flush();
if (!window.DDP._allSubscriptionsReady()) {
return { ready: false };
} else if (window.Spiderable.redirect) {
return { redirectTo: window.Spiderable.redirect };
}
return { ready: true };
});
};
var dumpPageContent = function () {
var output = page.content;
/*
@url https://github.com/jazeee/jazeee-meteor-spiderable/issues/16
@url http://googlewebmastercentral.blogspot.nl/2015/10/deprecating-our-ajax-crawling-scheme.html
@description Line 50 should be removed in further releases, when we will figure out how to avoid
content duplicating caused of page rendering twice. First on server, second on client, any
suggestion are welcome in issue #16
*/
output = output.replace(/<script([^>](?!type))*[^\s>]?(\stype\=("|')(application|text)\/javascript("|'))?([^>](?!type))*>(.|\n|\r)*?<\/script\s*>/ig, '');
output = output.replace(/<(\s)*meta(\s)+name\=('|")fragment('|")(\s)+content=('|")\!('|")(\s)*(\/)?(\s)*>/im, '');
var rem = /<!--\s*response:status-code=([0-9]{3})\s*-->/.exec(output);
if (rem && rem.length >= 3) {
if (!isNaN(rem[2])) {
realStatus = parseInt(rem[2]);
}
}
console.log(JSON.stringify({
status: realStatus,
headers: headers,
content: output
}));
};
page.onResourceReceived = function (response) {
if (response.url && response.url.length) {
response.url = response.url.replace(/\/$/, '');
}
if (response.redirectURL && response.redirectURL.length) {
response.redirectURL = response.redirectURL.replace(/\/$/, '');
}
if (response.url === url || response.redirectURL === url) {
realStatus = response.status;
headers = response.headers;
}
};
page.open(url, function () {
var renderIterations = 0;
intervalId = setInterval(function () {
var renderStatus = isReady();
if (renderIterations < 50 && (!renderStatus.ready || realStatus === null)) {
// Under heavy server load, we may not get an immediate response. We will wait for up to 5 seconds before allowing a response. See #13
renderIterations++;
return;
} else if (renderStatus.ready === true || realStatus) {
clearInterval(intervalId);
dumpPageContent();
phantom.exit();
} else if (renderStatus.redirectTo) {
clearInterval(intervalId);
renderPage(renderStatus.redirectTo);
}
if (totalIterations > 200) {
// We have waited too long. Don't leave this process running in the background...
phantom.exit(-1);
}
totalIterations++;
}, 100);
});
};
renderPage(__url);