Skip to content

Commit

Permalink
meta, ur
Browse files Browse the repository at this point in the history
  • Loading branch information
davidfmiller committed Mar 17, 2017
1 parent 9c1947c commit d4d5a46
Show file tree
Hide file tree
Showing 3 changed files with 276 additions and 33 deletions.
221 changes: 212 additions & 9 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,184 @@
'use strict';
/* global require, module, console, Promise */

function extensionForMime(mime) {
//(function(){

const
xpath = require('xpath'),
parse5 = require('parse5'),
xmlser = require('xmlserializer'),
url = require('url'),
async = require('async'),
process = require('process'),
DOM = require('xmldom').DOMParser,
request = require('request-promise-native'),

USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0';

/**
@param markup {String} - the contents of the HTML document that will be parsed
@param baseURL {String} - (optional) the base URL that should be applied to all relative paths within the document
*/
const parseMetadata = function(markup, options) {

return new Promise(function(resolve, reject) {

const
document = parse5.parse(markup),
xhtml = xmlser.serializeToString(document),
doc = new DOM().parseFromString(xhtml),
select = xpath.useNamespaces({'x': 'http://www.w3.org/1999/xhtml'}),
metas = select('//x:meta', doc),
links = select('//x:link', doc),
title = select('//x:title', doc);

if (! options.baseURL) {
options.baseURL = '/';
}

var
OBJ = {},
i = 0,
node = null,
keyAttribute = null,
valueAttribute = null;

if (title && title.length) {
OBJ.title = title[0].firstChild.nodeValue;
}

for (i in links) {

node = links[i];
keyAttribute = xpath.select1('@rel', node);
valueAttribute = xpath.select1('@href', node);

if (! keyAttribute) { continue; }

if (keyAttribute.value == 'shortcut icon' && valueAttribute) {
OBJ.favicon = {
mime : null,
url : url.resolve(options.baseURL, valueAttribute.value)
};
}
else if (keyAttribute.value == 'apple-touch-icon' && valueAttribute) {
OBJ['apple-touch-icon'] = {
mime : null,
url : url.resolve(options.baseURL, valueAttribute.value)
};
}
else if (keyAttribute.value == 'canonical' && valueAttribute) {
OBJ.canonical = valueAttribute.value;
}
}

for (i in metas) {
node = metas[i];
keyAttribute = xpath.select1('@property', node);
valueAttribute = xpath.select1('@content', node);

if (! keyAttribute) { continue; }

if (keyAttribute.value == 'og:image' && valueAttribute) {
OBJ['og:image'] = {
mime : null,
url : url.resolve(options.baseURL, valueAttribute.value)
};
}
else if (keyAttribute.value == 'og:title' && valueAttribute) {
OBJ['og:title'] = valueAttribute.value;
}
else if (keyAttribute.value == 'og:description' && valueAttribute) {
OBJ['og:description'] = valueAttribute.value;
}
}

// if we don't need the icons, resolve
if (! options.baseURL || ! options.icons) {
resolve(OBJ);
}

var
parsed = url.parse(options.baseURL),
addresses = {};

if (OBJ.favicon) {
addresses.favicon = OBJ.favicon.url;
}
else {
addresses.favicon = url.resolve(parsed.protocol + '//' + parsed.hostname, '/favicon.ico');
}

if (OBJ['og:image']) {
addresses['og:image'] = OBJ['og:image'].url;
}

if (OBJ['apple-touch-icon']) {
addresses['apple-touch-icon'] = OBJ['apple-touch-icon'].url;
}
else {
addresses['apple-touch-icon'] = url.resolve(parsed.protocol + '//' + parsed.hostname, '/apple-touch-icon.png');
}

// if both icons have been retrieved, resolve
if (Object.keys(addresses).length === 0) { resolve(OBJ); }

async.map(Object.keys(addresses), function(key, callback) {
request.head({
url : addresses[key],
headers: {
'User-Agent': USER_AGENT
}
}).then(function(response) {

OBJ[key] = {
mime : response['content-type'],
url : addresses[key]
};

callback();
}).catch(function(e) { // if icon doesn't exist, move on

callback();
});
}, function(err, results) {
resolve(OBJ);
});

}); // end promise
};


/**
@param url {String} -
@param options {Object} -
*/
const retrieveMetadata = function(address, options) {

// TODO http://stackoverflow.com/questions/16687618/how-do-i-get-the-redirected-url-from-the-nodejs-request-module

return new Promise(function(resolve, reject) {
request.get({
url : address,
headers : {
'User-Agent' : USER_AGENT
}
}).then(function(markup, response) {

console.log(response);

parseMetadata(markup, options ? options : { baseURL : address, icons : true }).then(function(obj) {
resolve(obj);
});
}).catch(function(e) {
reject(e);
});
});
};


const extensionForMime = function(mime) {

switch (mime) {
case 'text/html':
Expand All @@ -19,10 +197,10 @@ function extensionForMime(mime) {
return 'ico';
default:
return null;
};
}
}
};

function isURL(str) {
const isURL = function(str) {

var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.?)+[a-z]{2,}|'+ // domain name
Expand All @@ -36,9 +214,34 @@ function isURL(str) {
} else {
return true;
}
}
};

//}());

module.exports = {
extensionForMime : extensionForMime,
isURL : isURL
};
mime : {
extensionFor : extensionForMime
},
url : {
isA : isURL,
},
meta : {
parse : parseMetadata,
retrieve : retrieveMetadata
}
};


if (require.main === module) {

if (process.argv.length == 3) {
retrieveMetadata(process.argv[2]).then(function(meta) {
console.log(JSON.stringify(meta));
}).catch(function(err) {
console.log('🚫 ' + err);
});
} else {
console.log('🚫 No URL provided');
}
}

17 changes: 12 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,28 @@
"description": "🌏",
"license": "MIT",
"homepage": "https://github.com/davidfmiller/rmr-lib",
"keywords": [],
"keywords": [
"rmr",
"lib"
],
"devDependencies": {
"chai": "^3.5.0",
"coveralls": "^2.12.0",
"mocha": "^3.2.0",
"snyk": "^1.25.2"
},
"dependencies": {},
"dependencies": {
"parse5": "^3.0.2",
"request-promise-native": "^1.0.3",
"xmldom": "^0.1.27",
"xmlserializer": "^0.6.0",
"xpath": "0.0.24"
},
"scripts": {
"prestart": "",
"start": "node index.js",
"poststart": "",
"snyk": "snyk test",
"coverage": "istanbul cover ./node_modules/mocha/bin/_mocha",
"coveralls": "istanbul cover ./node_modules/mocha/bin/_mocha --report lcovonly -- -R spec && cat ./coverage/lcov.info | ./node_modules/coveralls/bin/coveralls.js && rm -rf ./coverage",
"test": "mocha -t 10000"
"test": "mocha"
}
}
71 changes: 52 additions & 19 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,63 @@ const
chai = require('chai'),
expect = chai.expect;

describe('rmr-lib', function() {
// this.timeout(10000);

it('mimes', function() {

expect(RMR.extensionForMime('image/png')).to.equal('png');
expect(RMR.extensionForMime('image/tiff')).to.equal('tiff');
expect(RMR.extensionForMime('image/jpg')).to.equal('jpg');
expect(RMR.extensionForMime('image/jpeg')).to.equal('jpg');
expect(RMR.extensionForMime('image/gif')).to.equal('gif');
expect(RMR.extensionForMime('image/vnd.microsoft.icon')).to.equal('ico');
expect(RMR.extensionForMime('image/x-icon')).to.equal('ico');
expect(RMR.extensionForMime('text/html')).to.equal('html');
expect(RMR.extensionForMime('asdfadsf')).to.equal(null);
describe('RMR', function() {
this.timeout(10000);

it('RMR.mime', function() {

expect(RMR.mime.extensionFor('image/png')).to.equal('png');
expect(RMR.mime.extensionFor('image/tiff')).to.equal('tiff');
expect(RMR.mime.extensionFor('image/jpg')).to.equal('jpg');
expect(RMR.mime.extensionFor('image/jpeg')).to.equal('jpg');
expect(RMR.mime.extensionFor('image/gif')).to.equal('gif');
expect(RMR.mime.extensionFor('image/vnd.microsoft.icon')).to.equal('ico');
expect(RMR.mime.extensionFor('image/x-icon')).to.equal('ico');

expect(RMR.mime.extensionFor('text/html')).to.equal('html');
expect(RMR.mime.extensionFor('asdfadsf')).to.equal(null);
});

it('isURL', function() {
it('RMR.url', function() {

expect(RMR.isURL('http://google.com')).to.equal(true);
expect(RMR.isURL('https://google.com')).to.equal(true);
expect(RMR.url.isA('http://google.com')).to.equal(true);
expect(RMR.url.isA('https://google.com')).to.equal(true);
// expect(RMR.url.isA('httpinvalid.com')).to.equal(false, 'abc');

expect(RMR.isURL('https://google.com')).to.equal(true);
});


it('RMR.meta', function() {

RMR.meta.retrieve('https://google.ca').then(function(data) {

expect(data.title).to.equal('Google');
expect(data.favicon.url).to.equal('https://google.ca/images/branding/product/ico/googleg_lodp.ico');

}).catch(function(m) { console.log(m); throw new Error(m.toString()); });

RMR.meta.retrieve('https://www.apple.com/shop/buy-ipad/ipad-mini-2').then(function(data) {

expect(data.title).to.equal('Buy iPad mini 2 - Apple');
expect(data.favicon.url).to.equal('https://www.apple.com/favicon.ico');
expect(data['og:description']).to.equal('iPad mini 2 is available in Silver or Space Gray, a range of storage sizes, and the option to add cellular data capability. View iPad mini 2 and pricing.');

}).catch(function(m) { console.log(m); throw new Error(m.toString()); });

RMR.meta.retrieve('http://readmeansrun.com').then(function(data) {

expect(data.title).to.equal('READMEANSRUN');
expect(data.favicon.url).to.equal('http://readmeansrun.com/favicon.ico');
expect(data['og:description']).to.equal('READMEANSRUN makes websites and takes pictures');

}).catch(function(m) { console.log(m); throw new Error(m.toString()); });

RMR.meta.retrieve('http://readmeansrun.com', { icons : false }).then(function(data) {

expect(data.favicon).to.equal(undefined);

}).catch(function(m) { console.log(m); throw new Error(m.toString()); });

});

});

0 comments on commit d4d5a46

Please sign in to comment.