Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

add first commit, support url and title now.

  • Loading branch information...
commit fbf9bcde061be627523b21872420449abbf0c870 1 parent 0f71974
@fengmk2 authored
View
3  .gitignore
@@ -1,4 +1,5 @@
lib-cov
+coverage.html
*.seed
*.log
*.csv
@@ -12,4 +13,4 @@ logs
results
node_modules
-npm-debug.log
+npm-debug.log
View
5 .npmignore
@@ -0,0 +1,5 @@
+lib-cov/
+coverage.html
+test/
+Makefile
+.travis.yml
View
5 .travis.yml
@@ -0,0 +1,5 @@
+language: node_js
+node_js:
+ - 0.9
+ - 0.8
+ - 0.6
View
21 LICENSE.txt
@@ -0,0 +1,21 @@
+This software is licensed under the MIT License.
+
+Copyright (C) 2012 by fengmk2 <fengmk2@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
View
20 Makefile
@@ -0,0 +1,20 @@
+TESTS = test/*.js
+REPORTER = spec
+TIMEOUT = 15000
+
+test:
+ @NODE_ENV=test ./node_modules/mocha/bin/mocha \
+ --reporter $(REPORTER) \
+ --timeout $(TIMEOUT) \
+ $(TESTS)
+
+test-cov:
+ @rm -rf ./lib-cov
+ @$(MAKE) lib-cov
+ @URLEXPAND_COV=1 $(MAKE) test
+ @URLEXPAND_COV=1 $(MAKE) test REPORTER=html-cov > coverage.html
+
+lib-cov:
+ @jscoverage lib $@
+
+.PHONY: test-cov test test-g lib-cov
View
49 README.md
@@ -1,4 +1,49 @@
-urlexpand
+urlexpand [![Build Status](https://secure.travis-ci.org/fengmk2/urlexpand.png)](http://travis-ci.org/fengmk2/urlexpand)
=========
-Expand the shorten url, handle all the 30x http redirect and get the original url.
+Expand the shorten url, handle all the 30x http redirect, get the original url and html page title.
+
+jscoverage: [91%](http://fengmk2.github.com/coverage/urlexpand.html) on nodejs 0.6.x.
+
+## Install
+
+```bash
+$ npm install urlexpand
+```
+
+## Usage
+
+```js
+var urlexpand = require('urlexpand');
+
+urlexpand('http://url.cn/8pBPLK', function (err, data) {
+ // data.url: 'http://instagram.com/p/QhLtWhB_A1/'
+ // data.title: 'Photo by sofishlin &bull; Instagram'
+ console.log(data);
+});
+```
+
+## License
+
+(The MIT License)
+
+Copyright (c) 2012 fengmk2 &lt;fengmk2@gmail.com&gt;
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
View
1  index.js
@@ -0,0 +1 @@
+module.exports = process.env.URLEXPAND_COV ? require('./lib-cov/expand') : require('./lib/expand');
View
180 lib/expand.js
@@ -0,0 +1,180 @@
+/*!
+ * urlexpand - index.js
+ * Copyright(c) 2012 fengmk2 <fengmk2@gmail.com>
+ * MIT Licensed
+ */
+
+"use strict";
+
+/**
+ * Module dependencies.
+ */
+
+var http = require('http');
+var https = require('https');
+var urlutil = require('url');
+var charset = require('charset');
+var iconv = require('iconv-lite');
+
+
+function handleCallback(err, url, callback) {
+ if (callback.__called) {
+ return;
+ }
+ callback.__called = true;
+ callback(err, {
+ url: url,
+ title: callback.__title,
+ count: callback.__redirectCounter,
+ tracks: callback.__tracks,
+ });
+}
+
+var TITLE_RE = /<title>([^<]+)</i;
+
+function getTitle(data, cs) {
+ cs = iconv.encodings[cs] ? cs : 'utf8';
+ var text = iconv.decode(data, cs);
+ var m = TITLE_RE.exec(text);
+ return m ? m[1].trim() : null;
+}
+
+/**
+ * Expand a shorten url, return the original url and the redirect histories.
+ *
+ * @param {String} url, the url you want to expand.
+ * @param {Object} [options]
+ * - {Number} [redirects], max redirect times, default is `5`.
+ * - {Boolean} [title], get title or not, default is `true`.
+ * - {Number} [timeout], request timeout, default is `10000` ms.
+ * @param {Function(err, data)} callback
+ * - {Object} data {
+ * {String} url: the last status 200 url.
+ * {String} title: the last status 200 html page title, maybe empty.
+ * {Number} count: need redirect times.
+ * {Array} tracks: the handle tracks. `[{ url: $url, headers: $headers, statusCode: 301 }, ... ]`
+ * }
+ */
+function expand(url, options, callback) {
+ if (typeof options === 'function') {
+ callback = options;
+ options = null;
+ }
+ options = options || {};
+ options.redirects = options.redirects || 5;
+ if (options.title === undefined) {
+ options.title = true;
+ }
+ options.timeout = options.timeout || 10000;
+ var info = urlutil.parse(url || '');
+ if (!info.hostname) {
+ return callback();
+ }
+ var reqOptions = {
+ hostname: info.hostname,
+ path: info.path,
+ method: 'GET'
+ };
+ if (info.port) {
+ reqOptions.port = info.port;
+ }
+ if (callback.__redirectCounter === undefined) {
+ callback.__redirectCounter = 0;
+ callback.__tracks = [];
+ }
+ var request = http.request;
+ if (info.protocol === 'https:') {
+ request = https.request;
+ }
+ var req = request(reqOptions);
+ var timer = null;
+ req.on('response', function (res) {
+ callback.__tracks.push({
+ url: url,
+ headers: res.headers,
+ statusCode: res.statusCode
+ });
+ if (res.statusCode === 302 || res.statusCode === 301) {
+ clearTimeout(timer);
+ callback.__redirectCounter++;
+ var location = urlutil.resolve(url, res.headers.location);
+ if (callback.__redirectCounter > options.redirects) {
+ return handleCallback(null, location, callback);
+ }
+ return expand(location, options, callback);
+ }
+
+ if (!options.title) {
+ clearTimeout(timer);
+ res.destroy();
+ return handleCallback(null, url, callback);
+ }
+
+ // get the title
+ var buffers = [];
+ var size = 0;
+ res.on('data', function (chunk) {
+ buffers.push(chunk);
+ size += chunk.length;
+ });
+ res.on('end', function () {
+ clearTimeout(timer);
+ var data = Buffer.concat(buffers, size);
+ var cs = charset(res.headers, data) || 'utf8';
+ var title = getTitle(data, cs);
+ callback.__title = title;
+ handleCallback(null, url, callback);
+ });
+ });
+ req.on('error', function (err) {
+ callback.__tracks.push({
+ url: url,
+ error: req.isTimeout ? 'request timeout' : err.message
+ });
+ handleCallback(err, url, callback);
+ });
+ req.end();
+ timer = setTimeout(function () {
+ req.isTimeout = true;
+ req.abort();
+ }, options.timeout);
+}
+
+module.exports = expand;
+
+/**
+ * Let Buffer support concat. node < 0.8
+ *
+ * https://github.com/joyent/node/blob/master/lib/buffer.js#L504
+ */
+
+if (!Buffer.concat) {
+ Buffer.concat = function (list, length) {
+ if (!Array.isArray(list)) {
+ throw new Error('Usage: Buffer.concat(list, [length])');
+ }
+
+ if (list.length === 0) {
+ return new Buffer(0);
+ } else if (list.length === 1) {
+ return list[0];
+ }
+
+ if (typeof length !== 'number') {
+ length = 0;
+ for (var i = 0; i < list.length; i++) {
+ var buf = list[i];
+ length += buf.length;
+ }
+ }
+
+ var buffer = new Buffer(length);
+ var pos = 0;
+ for (var i = 0; i < list.length; i++) {
+ var buf = list[i];
+ buf.copy(buffer, pos);
+ pos += buf.length;
+ }
+ return buffer;
+ };
+}
View
35 package.json
@@ -0,0 +1,35 @@
+{
+ "name": "urlexpand",
+ "version": "0.0.1",
+ "description": "Expand the shorten url, handle all the 30x http redirect and get the original url.",
+ "main": "index.js",
+ "directories": {
+ "test": "test"
+ },
+ "scripts": {
+ "test": "make test"
+ },
+ "dependencies": {
+ "charset": ">=0.0.1",
+ "iconv-lite": ">=0.2.5"
+ },
+ "devDependencies": {
+ "should": "*",
+ "mocha": "*"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/fengmk2/urlexpand.git"
+ },
+ "keywords": [
+ "shorten",
+ "url",
+ "expand",
+ "t.cn",
+ "url.cn",
+ "t.co",
+ "bit.ly"
+ ],
+ "author": "fengmk2 <fengmk2@gmail.com>",
+ "license": "MIT"
+}
View
208 test/urlexpand.test.js
@@ -0,0 +1,208 @@
+/*!
+ * urlexpand - test/urlexpand.js
+ * Copyright(c) 2012 fengmk2 <fengmk2@gmail.com>
+ * MIT Licensed
+ */
+
+"use strict";
+
+/**
+ * Module dependencies.
+ */
+
+var urlexpand = require('../');
+var should = require('should');
+var http = require('http');
+
+var server = http.createServer(function (req, res) {
+ if (req.url === '/error') {
+ return res.destroy();
+ }
+ if (req.url === '/redirect_to_error') {
+ res.writeHead(302, {
+ Location: '/error'
+ });
+ return res.end();
+ }
+ if (req.url === '/notitle') {
+ return res.end('<html><body></body></html>');
+ }
+ if (req.url === '/timeout') {
+ return;
+ }
+});
+
+
+describe('urlexpand.test.js', function () {
+
+ var serverURL = 'http://127.0.0.1:'
+ before(function (done) {
+ server.listen(0, function () {
+ serverURL += server.address().port;
+ done();
+ });
+ });
+
+ it('should expand http://t.cn/zlpFyQ7 to http://www.infoq.com/cn/news/2012/09/rails-40 with 1 redirect count',
+ function (done) {
+ urlexpand('http://t.cn/zlpFyQ7', function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'http://www.infoq.com/cn/news/2012/09/rails-40');
+ data.should.have.property('count', 1);
+ data.should.have.property('title', '即将来临的Rails 4.0将放弃Ruby 1.8支持,改进后台任务、缓存等多项内容');
+ data.should.have.property('tracks').with.length(2);
+ data.tracks[0].should.have.property('statusCode', 302);
+ data.tracks[0].should.have.property('url', 'http://t.cn/zlpFyQ7');
+ data.tracks[1].should.have.property('statusCode', 200);
+ data.tracks[1].should.have.property('url', 'http://www.infoq.com/cn/news/2012/09/rails-40');
+ done();
+ });
+ });
+
+ it('should expand and get gbk title', function (done) {
+ urlexpand('http://t.cn/zl0x0IL', function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'http://baike.baidu.com/view/6627415.htm');
+ data.should.have.property('count', 1);
+ data.should.have.property('title', '淘宝指数_百度百科');
+ data.should.have.property('tracks').with.length(2);
+ data.tracks[0].should.have.property('statusCode', 302);
+ data.tracks[0].should.have.property('url', 'http://t.cn/zl0x0IL');
+ data.tracks[1].should.have.property('statusCode', 200);
+ data.tracks[1].should.have.property('url', 'http://baike.baidu.com/view/6627415.htm');
+ done();
+ });
+ });
+
+ it('should redirect 2, get the last url and title', function (done) {
+ urlexpand('http://url.cn/8pBPLK', function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'http://instagram.com/p/QhLtWhB_A1/');
+ data.should.have.property('count', 2);
+ data.should.have.property('title', 'Photo by sofishlin &bull; Instagram');
+ data.should.have.property('tracks').with.length(3);
+ data.tracks[0].should.have.property('statusCode', 302);
+ data.tracks[0].should.have.property('url', 'http://url.cn/8pBPLK');
+ data.tracks[1].should.have.property('statusCode', 301);
+ data.tracks[1].should.have.property('url', 'http://instagr.am/p/QhLtWhB_A1/');
+ data.tracks[2].should.have.property('statusCode', 200);
+ data.tracks[2].should.have.property('url', 'http://instagram.com/p/QhLtWhB_A1/');
+ done();
+ });
+ });
+
+ it('should only redirect once and get the last url but no title', function (done) {
+ urlexpand('http://url.cn/8pBPLK', { redirects: 1 }, function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'http://instagram.com/p/QhLtWhB_A1/');
+ data.should.have.property('count', 2);
+ data.should.not.have.property('title');
+ data.should.have.property('tracks').with.length(2);
+ data.tracks[0].should.have.property('headers').with.be.a('object');
+ data.tracks[0].should.have.property('statusCode', 302);
+ data.tracks[0].should.have.property('url', 'http://url.cn/8pBPLK');
+ data.tracks[1].should.have.property('statusCode', 301);
+ data.tracks[1].should.have.property('url', 'http://instagr.am/p/QhLtWhB_A1/');
+ done();
+ });
+ });
+
+ it('should get the url and title with no redirect', function (done) {
+ urlexpand('http://www.taobao.com/', function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'http://www.taobao.com/');
+ data.should.have.property('count', 0);
+ data.should.have.property('title', '淘宝网 - 淘!我喜欢');
+ data.should.have.property('tracks').with.length(1);
+ data.tracks[0].should.have.property('headers').with.be.a('object');
+ data.tracks[0].should.have.property('statusCode', 200);
+ data.tracks[0].should.have.property('url', 'http://www.taobao.com/');
+ done();
+ });
+ });
+
+ it('should only get the url with no redirect', function (done) {
+ urlexpand('http://www.taobao.com/', { title: false }, function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'http://www.taobao.com/');
+ data.should.have.property('count', 0);
+ data.should.not.have.property('title');
+ data.should.have.property('tracks').with.length(1);
+ data.tracks[0].should.have.property('headers').with.be.a('object');
+ data.tracks[0].should.have.property('statusCode', 200);
+ data.tracks[0].should.have.property('url', 'http://www.taobao.com/');
+ done();
+ });
+ });
+
+ it('should handle https as well', function (done) {
+ urlexpand('https://t.co/DltTy9tA', function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', 'https://github.com/visionmedia/better-assert');
+ data.should.have.property('count', 1);
+ data.should.have.property('title', 'visionmedia/better-assert · GitHub');
+ data.should.have.property('tracks').with.length(2);
+ done();
+ });
+ });
+
+ it('should return no title', function (done) {
+ urlexpand(serverURL + '/notitle', function (err, data) {
+ should.not.exist(err);
+ should.exist(data);
+ data.should.have.property('url', serverURL + '/notitle');
+ data.should.have.property('count', 0);
+ data.should.have.property('title', null);
+ done();
+ });
+ });
+
+ it('should return error when request /error', function (done) {
+ urlexpand(serverURL + '/error', function (err, data) {
+ should.exist(err);
+ should.exist(data);
+ data.should.have.property('url', serverURL + '/error');
+ data.should.have.property('count', 0);
+ data.should.not.have.property('title');
+ data.should.have.property('tracks').with.length(1);
+ data.tracks[0].should.have.property('error', 'socket hang up');
+ done();
+ });
+ });
+
+ it('should return error when request /redirect_to_error', function (done) {
+ urlexpand(serverURL + '/redirect_to_error', function (err, data) {
+ should.exist(err);
+ should.exist(data);
+ data.should.have.property('url', serverURL + '/error');
+ data.should.have.property('count', 1);
+ data.should.not.have.property('title');
+ data.should.have.property('tracks').with.length(2);
+ data.tracks[0].should.have.property('statusCode', 302);
+ data.tracks[0].should.have.property('url', serverURL + '/redirect_to_error');
+ data.tracks[1].should.have.property('error', 'socket hang up');
+ done();
+ });
+ });
+
+ it('should return timeout error when request /timeout', function (done) {
+ urlexpand(serverURL + '/timeout', { timeout: 500 }, function (err, data) {
+ should.exist(err);
+ should.exist(data);
+ data.should.have.property('url', serverURL + '/timeout');
+ data.should.have.property('count', 0);
+ data.should.not.have.property('title');
+ data.should.have.property('tracks').with.length(1);
+ data.tracks[0].should.have.property('error', 'request timeout');
+ done();
+ });
+ });
+
+});
Please sign in to comment.
Something went wrong with that request. Please try again.