node.js utilities for reading pipermail archives
Switch branches/tags
greenkeeper/htmlparser2-3.10.0 greenkeeper/jest-22.0.1 greenkeeper/jest-22.0.2 greenkeeper/jest-22.0.3 greenkeeper/jest-22.0.4 greenkeeper/jest-22.0.5 greenkeeper/jest-22.0.6 greenkeeper/jest-22.1.0 greenkeeper/jest-22.1.1 greenkeeper/jest-22.1.2 greenkeeper/jest-22.1.3 greenkeeper/jest-22.1.4 greenkeeper/jest-pin-22.0.0 greenkeeper/lint-staged-4.3.0 greenkeeper/lint-staged-7.0.5 greenkeeper/lint-staged-7.2.1 greenkeeper/lint-staged-7.2.2 greenkeeper/lint-staged-pin-7.2.0 greenkeeper/prettier-1.9.2 greenkeeper/prettier-1.10.0 greenkeeper/prettier-1.10.1 greenkeeper/prettier-1.10.2 greenkeeper/prettier-1.15.1 greenkeeper/prettier-pin-1.9.1 greenkeeper/ts-jest-21.2.0 greenkeeper/ts-jest-21.2.1 greenkeeper/ts-jest-21.2.2 greenkeeper/ts-jest-21.2.3 greenkeeper/ts-jest-21.2.4 greenkeeper/ts-jest-22.0.0 greenkeeper/ts-jest-22.0.1 greenkeeper/ts-jest-22.0.2 greenkeeper/ts-jest-23.1.0 greenkeeper/ts-jest-23.1.1 greenkeeper/ts-jest-23.1.2 greenkeeper/ts-jest-23.1.3 greenkeeper/ts-jest-23.1.4 greenkeeper/ts-jest-pin-21.1.4 greenkeeper/ts-jest-pin-23.0.1 greenkeeper/@types/jest-22.1.0 greenkeeper/@types/jest-23.1.3 greenkeeper/@types/jest-23.1.4 greenkeeper/@types/jest-23.1.5 greenkeeper/@types/jest-23.1.6 greenkeeper/@types/jest-23.3.1 greenkeeper/@types/jest-23.3.2 greenkeeper/@types/jest-23.3.6 greenkeeper/@types/jest-23.3.7 greenkeeper/@types/jest-23.3.8 greenkeeper/@types/jest-23.3.9 greenkeeper/@types/jest-23.3.10 greenkeeper/@types/jest-pin-22.0.1 greenkeeper/@types/jest-pin-23.1.2 greenkeeper/@types/jest-pin-23.3.5 greenkeeper/@types/node-10.5.8 greenkeeper/@types/node-10.7.0 greenkeeper/@types/node-10.7.1 greenkeeper/@types/node-10.7.2 greenkeeper/@types/node-10.9.0 greenkeeper/@types/node-10.9.1 greenkeeper/@types/node-10.9.2 greenkeeper/@types/node-10.9.3 greenkeeper/@types/node-10.9.4 greenkeeper/@types/node-10.11.1 greenkeeper/@types/node-10.11.2 greenkeeper/@types/node-10.11.3 greenkeeper/@types/node-10.11.4 greenkeeper/@types/node-10.11.5 greenkeeper/@types/node-10.11.6 greenkeeper/@types/node-10.11.7 greenkeeper/@types/node-10.12.0 greenkeeper/@types/node-10.12.1 greenkeeper/@types/node-10.12.2 greenkeeper/@types/node-10.12.3 greenkeeper/@types/node-10.12.4 greenkeeper/@types/node-10.12.5 greenkeeper/@types/node-10.12.6 greenkeeper/@types/node-10.12.7 greenkeeper/@types/node-10.12.8 greenkeeper/@types/node-10.12.9 greenkeeper/@types/node-10.12.10 greenkeeper/@types/node-10.12.11 greenkeeper/@types/node-10.12.12 greenkeeper/@types/node-10.12.13 greenkeeper/@types/node-10.12.14 greenkeeper/@types/node-pin-10.5.7 greenkeeper/@types/node-pin-10.11.0 master version-1
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Type Name Latest commit message Commit time
Failed to load latest commit information.
src
.gitignore
.prettierrc
.travis.yml
README.md
example.js
package.json
sample.js
tsconfig.json

README.md

pipermail

node.js utilities for reading pipermail archives such as es-discuss

Build Status Dependency Status NPM version

Basic Usage

var pipermail = require('pipermail');

var options = {};

//`pipermail` returns a stream of JSON objects.
//This can't be directly written to a file
var parsed = pipermail('https://mail.mozilla.org/pipermail/es-discuss/', options);

//convert the stream of json objects into a stream of JSON text seperated by new lines.
var stringified = parsed.pipe(pipermail.stringify());

//pipe to a file
stringified.pipe(require('fs').createWriteStream('res.txt'));

//compress to a file
stringified.pipe(require('zlib').createGzip())
  .pipe(require('fs').createWriteStream('res.txt.gz'));

The resulting res.txt would look something like:

{"url":"https://mail.mozilla.org/pipermail/es-discuss/2006-June/003436.html","header":{"from":{"email":"baz@example.com","name":"Brendan Eich"},"date":"Sat, 3 Jun 2006 12:35:18 -0700","subject":"Welcome to the ECMAScript Edition 4 discussion list"},"body":"Thanks to Graydon Hoare for setting it up.\n\n/be"}
{"url":"https://mail.mozilla.org/pipermail/es-discuss/2006-June/003437.html","header":{"from":{"email":"bar@example.com","name":"Olav Junker Kjær"},"date":"Tue, 06 Jun 2006 15:40:48 +0200","subject":"ES4 translator"},"body":"Hello,\nI'm very pleased to s the new public specs for ES4"}
{"url":"https://mail.mozilla.org/pipermail/es-discuss/2006-June/003438.html","header":{"from":{"email":"foo@example.com","name":"Robert Sayre"},"date":"Wed, 7 Jun 2006 11:43:37 -0400","subject":"date literals"},"body":"I think the date literal should allow a trailing 'Z' to substitute for\n'+00:00'.\n\nRobert Sayre"}

I've shortened the bodies and renamed the e-mails but other than that it's the first few lines generated by the above code.

Options

  • filterMonth: a function that gets the month's url as its argument and returns true or false to indicate whether the month should be included (or returns a promise if it's asynchronous)
  • filterMessage: a function that gets the message's url as its argument and returns true or false to indicate whether the message should be downloaded (or returns a promise if it's asynchronous)
  • months: the maximum number of months to download. If set, only the most recent n months will be downloaded.
  • parallel: the maximum number of messages to download in parallel, defaults to 10
  • parallelMonths: the maximum number of month index pages to download in parallel, defaults to 2
  • archiveUrlRegex : the regex used to look for message archives on the index page of the pipermail /\d\d\d\d\-[a-z]+\.txt(?:\.gz)?/gi

License

MIT