This repository has been archived by the owner on Sep 21, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #13 from antislice/upstream-pr
Fix bugs in cleanText() and wordCount(), add some tests
- Loading branch information
Showing
7 changed files
with
256 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
node_modules |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
--reporter nyan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
var assert = require('assert'); | ||
var TextStatistics = require('../index.js'); | ||
|
||
describe('TextStatistics', function() { | ||
// this is called when you "make" a TextStatistics | ||
describe('#cleanText()', function() { | ||
it('should add a final terminator if it\'s missing', function() { | ||
var ts = TextStatistics('Hello friend'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should not add a final terminator if there is a \'.\'', function() { | ||
var ts = TextStatistics('Hello friend.'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
context('trailing whitespace', function() { | ||
it('should strip spaces', function() { | ||
var ts = TextStatistics('Hello friend. '); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should strip newlines', function() { | ||
var ts = TextStatistics('Hello friend.\n\n'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should strip \\r\\n thing', function() { | ||
var ts = TextStatistics('Hello friend.\r\n'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should strip tabs', function() { | ||
var ts = TextStatistics('Hello friend.\t'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
}); | ||
|
||
context('leading whitespace', function() { | ||
it('should strip spaces', function() { | ||
var ts = TextStatistics(' Hello friend.'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should strip newlines', function() { | ||
var ts = TextStatistics('\n\nHello friend.'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should strip \\r\\n thing', function() { | ||
var ts = TextStatistics('\r\nHello friend.'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
|
||
it('should strip tabs', function() { | ||
var ts = TextStatistics('\tHello friend.'); | ||
assert.equal(ts.text, 'Hello friend.'); | ||
}); | ||
}); | ||
|
||
it('should remove multiple spaces between words', function() { | ||
var ts = TextStatistics('Hello good friend.'); | ||
assert.equal(ts.text, 'Hello good friend.'); | ||
}); | ||
|
||
it('should un-duplicate terminators', function() { | ||
var ts = TextStatistics('Hello... Friend..'); | ||
assert.equal(ts.text, 'Hello. Friend.'); | ||
}); | ||
|
||
it('should pad terminators with a space', function() { | ||
var ts = TextStatistics('Hello.Good.Friend.'); | ||
assert.equal(ts.text, 'Hello. Good. Friend.'); | ||
}); | ||
|
||
context('unify terminators', function() { | ||
it('should replace all !! with ..', function() { | ||
var ts = TextStatistics('Hello! Friend!'); | ||
assert.equal(ts.text, 'Hello. Friend.'); | ||
}); | ||
|
||
it('should replace all ?? with ..', function() { | ||
var ts = TextStatistics('Hello? Friend?'); | ||
assert.equal(ts.text, 'Hello. Friend.'); | ||
}); | ||
}); | ||
|
||
context('replacing newlines with terminators', function() { | ||
it('should replace \\n', function() { | ||
var ts = TextStatistics('bulleted list here we go\nnice dog\ngood dog'); | ||
assert.equal(ts.text, 'bulleted list here we go. nice dog. good dog.'); | ||
}); | ||
|
||
it('should replace \\r\\n', function() { | ||
var ts = TextStatistics('bulleted list here we go\r\nnice dog\r\ngood dog'); | ||
assert.equal(ts.text, 'bulleted list here we go. nice dog. good dog.'); | ||
}); | ||
|
||
it('should replace \\r', function() { | ||
var ts = TextStatistics('bulleted list here we go\rnice dog\rgood dog'); | ||
assert.equal(ts.text, 'bulleted list here we go. nice dog. good dog.'); | ||
}); | ||
}); | ||
|
||
context('stripping periods from email addresses', function() { | ||
it('should replace a single period', function() { | ||
var ts = TextStatistics('textstatistics@example.com'); | ||
assert.equal(ts.text, 'textstatistics@examplecom.'); | ||
}); | ||
|
||
it('should replace a single period in the first part', function() { | ||
var ts = TextStatistics('text.statistics@example.com'); | ||
assert.equal(ts.text, 'textstatistics@examplecom.'); | ||
}); | ||
|
||
it('should replace two periods in the first part', function() { | ||
var ts = TextStatistics('text.stat.istics@example.com'); | ||
assert.equal(ts.text, 'textstatistics@examplecom.'); | ||
}); | ||
|
||
it('should replace periods with a subdomain', function() { | ||
var ts = TextStatistics('textstatistics@test.example.com'); | ||
assert.equal(ts.text, 'textstatistics@testexamplecom.'); | ||
}); | ||
|
||
it('should replace periods with a subdomain and before the @', function() { | ||
var ts = TextStatistics('text.stat.istics@test.example.com'); | ||
assert.equal(ts.text, 'textstatistics@testexamplecom.'); | ||
}); | ||
}); | ||
|
||
context('replacing non-terminator punctuation', function() { | ||
it('should replace commas with spaces', function() { | ||
var ts = TextStatistics('Hello, hi, friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace colons with spaces', function() { | ||
var ts = TextStatistics('Hello: hi: friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace semicolons with spaces', function() { | ||
var ts = TextStatistics('Hello; hi; friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace parentheses with spaces', function() { | ||
var ts = TextStatistics('(Hello (hi) friend).'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace slashes with spaces', function() { | ||
var ts = TextStatistics('Hello/hi/friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace double hyphens with spaces', function() { | ||
var ts = TextStatistics('Hello--hi--friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should not replace a single dash with spaces', function() { | ||
var ts = TextStatistics('Hi-di-ho friend-person!'); | ||
assert.equal(ts.text, 'Hi-di-ho friend-person.'); | ||
}); | ||
|
||
it('should replace pluses with spaces', function() { | ||
var ts = TextStatistics('Hello + hi+friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace ampersands with spaces', function() { | ||
var ts = TextStatistics('Hello&hi & friend.'); | ||
assert.equal(ts.text, 'Hello hi friend.'); | ||
}); | ||
|
||
it('should replace em-dash with spaces'); // can I do that? | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
var assert = require('assert'); | ||
var TextStatistics = require('../index.js'); | ||
|
||
describe('TextStatistics', function() { | ||
|
||
describe('#sentenceCount()', function() { | ||
it('should count a single sentence', function() { | ||
var ts = TextStatistics('see spot run.'); | ||
assert.equal(1, ts.sentenceCount()); | ||
}); | ||
|
||
it('should count a single sentence with a comma', function() { | ||
var ts = TextStatistics('see, spot runs.'); | ||
assert.equal(1, ts.sentenceCount()); | ||
}); | ||
|
||
it('should count a few simple sentences', function() { | ||
var ts = TextStatistics('see spot run. good job spot. have a treat.'); | ||
assert.equal(3, ts.sentenceCount()); | ||
}); | ||
}); | ||
|
||
describe('#wordCount()', function() { | ||
it('a string w/o words should have word count of one, because dividing by zero', function() { | ||
var ts = TextStatistics('.'); | ||
assert.equal(1, ts.wordCount()); | ||
}); | ||
|
||
it('should count the number of words in a text', function() { | ||
var ts = TextStatistics('see spot run'); | ||
assert.equal(3, ts.wordCount()); | ||
}); | ||
|
||
it('should not count words with an apostrophe as two words', function() { | ||
var ts = TextStatistics('they\'re'); | ||
assert.equal(1, ts.wordCount()); | ||
}); | ||
|
||
it('should not count the empty string after a period as a word', function() { | ||
var ts = TextStatistics('dog.'); | ||
assert.equal(1, ts.wordCount()); | ||
}); | ||
|
||
it('should count an email address as a single word', function() { | ||
var ts = TextStatistics('textstatistics@example.com'); | ||
assert.equal(1, ts.wordCount()); | ||
}); | ||
|
||
it('should count words with a dash as a single word', function() { | ||
var ts = TextStatistics('long-term'); | ||
assert.equal(1, ts.wordCount()); | ||
}); | ||
}); | ||
}); |