Skip to content
This repository has been archived by the owner on Jun 1, 2021. It is now read-only.

Commit

Permalink
Use a local version of netsniff.js, from PhantomJS
Browse files Browse the repository at this point in the history
  • Loading branch information
joelpurra committed May 14, 2014
1 parent a095459 commit 914deb2
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 7 deletions.
19 changes: 19 additions & 0 deletions README.md
@@ -0,0 +1,19 @@
# [heedless](https://github.com/joelpurra/heedless/)

Download [HAR files](http://www.softwareishard.com/blog/har-12-spec/) using [PhantomJS](http://phantomjs.org/) for the front pages of domains.



## Original purpose

Built as a component in [Joel Purra's master's thesis](http://joelpurra.com/projects/masters-thesis/) research, where downloading lots of front pages in the .se top level domain zone was required to analyze their content and use of internal/external resources.



## Thanks

- `netsniff.js` is based on the [example with the same name](https://github.com/ariya/phantomjs/blob/master/examples/netsniff.js) in [PhantomJS](http://phantomjs.org/), created by Ariya Hidayat, release under the [BSD 3-Clause "New" or "Revised" License (BSD-3-Clause)](http://opensource.org/licenses/BSD-3-Clause).



Copyright (c) 2014 [Joel Purra](http://joelpurra.com/). Released under [GNU General Public License version 3.0 (GPL-3.0)](https://www.gnu.org/licenses/gpl.html).
10 changes: 3 additions & 7 deletions src/get/har.sh
@@ -1,16 +1,12 @@
#!/bin/bash
set -e

[[ ! `which phantomjs` ]] && echo "phantomjs is required"

cd "$(dirname $0)"

url="$1"
phantomExecutable="phantomjs"
phantomPath=$(which "$phantomExecutable")
phantomDir=$(dirname "$phantomPath")
phantomRelativeLink=$(readlink "$phantomPath")
phantomRealpath=`echo $phantomDir/$phantomRelativeLink`
phantomRealdir=$(dirname "$phantomRealpath")
netsniffJs="$phantomRealdir/../share/phantomjs/examples/netsniff.js"
netsniffJs="./netsniff.js"
executionErrorHAR="./execution-error.har"

result=$(phantomjs "$netsniffJs" "$url")
Expand Down
143 changes: 143 additions & 0 deletions src/get/netsniff.js
@@ -0,0 +1,143 @@
if (!Date.prototype.toISOString) {
Date.prototype.toISOString = function () {
function pad(n) { return n < 10 ? '0' + n : n; }
function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n }
return this.getFullYear() + '-' +
pad(this.getMonth() + 1) + '-' +
pad(this.getDate()) + 'T' +
pad(this.getHours()) + ':' +
pad(this.getMinutes()) + ':' +
pad(this.getSeconds()) + '.' +
ms(this.getMilliseconds()) + 'Z';
}
}

function createHAR(address, title, startTime, resources)
{
var entries = [];

resources.forEach(function (resource) {
var request = resource.request,
startReply = resource.startReply,
endReply = resource.endReply;

if (!request || !startReply || !endReply) {
return;
}

// Exclude Data URI from HAR file because
// they aren't included in specification
if (request.url.match(/(^data:image\/.*)/i)) {
return;
}

entries.push({
startedDateTime: request.time.toISOString(),
time: endReply.time - request.time,
request: {
method: request.method,
url: request.url,
httpVersion: "HTTP/1.1",
cookies: [],
headers: request.headers,
queryString: [],
headersSize: -1,
bodySize: -1
},
response: {
status: endReply.status,
statusText: endReply.statusText,
httpVersion: "HTTP/1.1",
cookies: [],
headers: endReply.headers,
redirectURL: "",
headersSize: -1,
bodySize: startReply.bodySize,
content: {
size: startReply.bodySize,
mimeType: endReply.contentType
}
},
cache: {},
timings: {
blocked: 0,
dns: -1,
connect: -1,
send: 0,
wait: startReply.time - request.time,
receive: endReply.time - startReply.time,
ssl: -1
},
pageref: address
});
});

return {
log: {
version: '1.2',
creator: {
name: "PhantomJS",
version: phantom.version.major + '.' + phantom.version.minor +
'.' + phantom.version.patch
},
pages: [{
startedDateTime: startTime.toISOString(),
id: address,
title: title,
pageTimings: {
onLoad: page.endTime - page.startTime
}
}],
entries: entries
}
};
}

var page = require('webpage').create(),
system = require('system');

if (system.args.length === 1) {
console.log('Usage: netsniff.js <some URL>');
phantom.exit(1);
} else {

page.address = system.args[1];
page.resources = [];

page.onLoadStarted = function () {
page.startTime = new Date();
};

page.onResourceRequested = function (req) {
page.resources[req.id] = {
request: req,
startReply: null,
endReply: null
};
};

page.onResourceReceived = function (res) {
if (res.stage === 'start') {
page.resources[res.id].startReply = res;
}
if (res.stage === 'end') {
page.resources[res.id].endReply = res;
}
};

page.open(page.address, function (status) {
var har;
if (status !== 'success') {
console.log('FAIL to load the address');
phantom.exit(1);
} else {
page.endTime = new Date();
page.title = page.evaluate(function () {
return document.title;
});
har = createHAR(page.address, page.title, page.startTime, page.resources);
console.log(JSON.stringify(har, undefined, 4));
phantom.exit();
}
});
}

0 comments on commit 914deb2

Please sign in to comment.