Skip to content
This repository has been archived by the owner on Jan 6, 2022. It is now read-only.

Commit

Permalink
cli options
Browse files Browse the repository at this point in the history
  • Loading branch information
max-mapper committed Nov 12, 2015
1 parent 425ba62 commit 75ce84e
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 11 deletions.
23 changes: 21 additions & 2 deletions README.md
Expand Up @@ -6,7 +6,9 @@ Uses the implementation of Rabin fingerprinting from [LBFS](https://github.com/f

Rabin fingerprinting is useful for finding the chunks of a file that differ from a previous version. It's one implementation of a technique called "Content-defined chunking", meaning the chunk boundaries are determinstic to the content (as opposed to "fixed-sized chunking").

## API
Theres a JavaScript API and an accompanying command-line tool.

## JavaScript API

### `var createRabin = require('rabin')`

Expand All @@ -16,7 +18,7 @@ Rabin fingerprinting is useful for finding the chunks of a file that differ from

`rabin` is a duplex stream. You write raw data in, and buffers chunked by rabin fingerprints will be written out.

## Example
## JavaScript Example

```js
// require and create an instance
Expand All @@ -32,3 +34,20 @@ rabin.on('data', function (chunk) {
// and splitting on each rabin fingerprint found
})
```

## CLI API

```
$ npm install rabin -g
$ rabin myfile.txt --bits=14 --min=8192 --max=32768 # defaults
{"length":12182,"offset":0,"hash":"5df6245b5897336ebf611d7f10fb90eea2d63c5b9ec9ad76dfb1ac72b8249dcb"}
{"length":13190,"offset":12182,"hash":"67d5aaac9cf7b8432cb3c8071d726dc38f1138957c30719f8b166116a90950a1"}
{"length":11609,"offset":25372,"hash":"976a0e3dc43de3abdf50b984a102c5fb7c2550e3dc5e44e4a8f7d4241276683b"}
{"length":10010,"offset":36981,"hash":"7145d10f93ea03e6c8b4dd5ab148e2c3c08f9c71bf71c7559dffdfcef48112c1"}
{"length":13623,"offset":46991,"hash":"76470d5047f9fb31bd75364d90355fdbf913aaa1df934251f43c894f01381f1b"}
{"length":8197,"offset":60614,"hash":"88abce05bc75f72cdafeabd5125eb46fa8f73eab2d75a29076aeb3f99ef35548"}
{"length":16242,"offset":68811,"hash":"08d60789c1e901d6a8e474aeb5de4746af1648e7f3a4ac7a3dba87d9e73fca56"}
{"length":14947,"offset":85053,"hash":"4224e6f4361fa8bdefb9d8e10ebd046e2869af2c44ea7e84c7efaeedd5423b30"}
average 12500
```

16 changes: 12 additions & 4 deletions bindings.cc
Expand Up @@ -32,13 +32,21 @@ void get_fingerprints(rabin_t *hasher, Local<Array> bufs, Local<Array> lengths)
NAN_METHOD(Initialize) {
if (instance_counter >= 1024) return Nan::ThrowError("the value of instance_counter is too damn high");
struct rabin_t *hasher = (struct rabin_t *) malloc(sizeof(struct rabin_t));

if (!info[0]->IsNumber()) return Nan::ThrowError("first arg must be a number");
if (!info[1]->IsNumber()) return Nan::ThrowError("second arg must be a number");
if (!info[2]->IsNumber()) return Nan::ThrowError("third arg must be a number");

hasher->average_bits = info[0]->Uint32Value();
hasher->minsize = info[1]->Uint32Value();
hasher->maxsize = info[2]->Uint32Value();

// Open a pull request if you need these to be configurable
hasher->mask = ((1<<hasher->average_bits)-1);
hasher->polynomial = 0x3DA3358B4DC173LL;
hasher->polynomial_degree = 53;
hasher->average_bits = 14;
hasher->minsize = 8 * 1024;
hasher->maxsize = 32 * 1024;
hasher->mask = ((1<<hasher->average_bits)-1);
hasher->polynomial_shift = (hasher->polynomial_degree-8);

rabin_init(hasher);
instances[instance_counter++] = hasher;
info.GetReturnValue().Set(instance_counter - 1);
Expand Down
5 changes: 3 additions & 2 deletions cli.js
@@ -1,9 +1,10 @@
#!/usr/bin/env node
var fs = require('fs')
var crypto = require('crypto')
var rabin = require('./')()
var args = require('minimist')(process.argv.slice(2))
var rabin = require('./')(args)
var offset = 0
var rs = fs.createReadStream(process.argv[2])
var rs = fs.createReadStream(args._[0])
var count = 0
rs.pipe(rabin).on('data', function (ch) {
offset += ch.length
Expand Down
9 changes: 6 additions & 3 deletions index.js
Expand Up @@ -6,11 +6,14 @@ var debug = require('debug')('rabin')

module.exports = Rabin

function Rabin () {
if (!(this instanceof Rabin)) return new Rabin()
function Rabin (opts) {
if (!(this instanceof Rabin)) return new Rabin(opts)
this.destroyed = false
this.rabinEnded = false
this.rabin = rabin.initialize()
var avgBits = +opts.bits || 12
var min = +opts.min || 8 * 1024
var max = +opts.max || 32 * 1024
this.rabin = rabin.initialize(avgBits, min, max)
this.nextCb = null
this.buffers = new BufferList()
this.on('finish', function () {
Expand Down
2 changes: 2 additions & 0 deletions package.json
Expand Up @@ -6,6 +6,7 @@
"bin": {
"rabin": "cli.js"
},
"keywords": ["rabin", "cdc", "chunking", "fingerprint", "rolling hash", "dedupe", "deduplication", "rsync"],
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"install": "prebuild --download && echo \"Installed prebuilt binary successfully.\n\"",
Expand All @@ -18,6 +19,7 @@
"bindings": "^1.2.1",
"bl": "^1.0.0",
"debug": "^2.2.0",
"minimist": "^1.2.0",
"nan": "^2.1.0",
"prebuild": "^2.6.2",
"readable-stream": "^2.0.4"
Expand Down

0 comments on commit 75ce84e

Please sign in to comment.