From b4a8d1f264154e836c6b00c880a8aa14d7926e5d Mon Sep 17 00:00:00 2001 From: Andrew Duthie Date: Sun, 3 Feb 2013 22:08:34 -0500 Subject: [PATCH] Initial commit --- .gitignore | 2 + Gruntfile.coffee | 40 ++++++++ LICENSE.txt | 7 ++ README.md | 32 ++++++ docs/LibroIpsum.html | 165 ++++++++++++++++++++++++++++++ docs/docco.css | 192 +++++++++++++++++++++++++++++++++++ lib/LibroIpsum.js | 225 +++++++++++++++++++++++++++++++++++++++++ lib/LibroIpsum.min.js | 2 + package.json | 20 ++++ src/LibroIpsum.coffee | 230 ++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 915 insertions(+) create mode 100644 .gitignore create mode 100755 Gruntfile.coffee create mode 100755 LICENSE.txt create mode 100755 README.md create mode 100644 docs/LibroIpsum.html create mode 100644 docs/docco.css create mode 100644 lib/LibroIpsum.js create mode 100644 lib/LibroIpsum.min.js create mode 100755 package.json create mode 100644 src/LibroIpsum.coffee diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2752eb9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +.DS_Store diff --git a/Gruntfile.coffee b/Gruntfile.coffee new file mode 100755 index 0000000..a4c759e --- /dev/null +++ b/Gruntfile.coffee @@ -0,0 +1,40 @@ +module.exports = (grunt) -> + + grunt.initConfig + + pkg: grunt.file.readJSON('package.json') + + meta: + banner: '/*! <%= pkg.name %> <%= pkg.version %> | (c) <%= grunt.template.today("yyyy") %> <%= pkg.author %> | <%= pkg.license %> License */\n' + + coffee: + compile: + files: + 'lib/LibroIpsum.js' : 'src/LibroIpsum.coffee' + + concat: + options: + banner: '<%= meta.banner %>' + dist: + src: ['lib/LibroIpsum.js'] + dest: 'lib/LibroIpsum.js' + + uglify: + options: + banner: '<%= meta.banner %>' + dist: + files: + 'lib/LibroIpsum.min.js': ['lib/LibroIpsum.js'] + + watch: + files: 'src/LibroIpsum.coffee' + tasks: ['coffee'] + + grunt.loadNpmTasks 'grunt-contrib-coffee' + grunt.loadNpmTasks 'grunt-contrib-concat' + grunt.loadNpmTasks 'grunt-contrib-uglify' + grunt.loadNpmTasks 'grunt-contrib-watch' + + grunt.registerTask 'compile', ['coffee', 'concat'] + grunt.registerTask 'default', ['compile', 'watch'] + grunt.registerTask 'release', ['compile', 'uglify'] diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100755 index 0000000..6500972 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,7 @@ +Copyright (C) 2013 Andrew Duthie + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100755 index 0000000..2408d48 --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# LibroIpsum + +LibroIpsum is a simple placeholder text generator similar to other [lorem ipsum](http://en.wikipedia.org/wiki/Lorem_ipsum) tools. LibroIpsum differs from traditional lorem ipsum in that it can be used to generate random phrases from any source text, using character distribution analysis to generate new phrases. + +[View annotated source code](http://aduth.github.com/LibroIpsum) + +Use LibroIpsum from Node.js, RequireJS, or directly in the browser. + +### Node.js + npm install LibroIpsum + +### RequireJS + define(['path/to/libs/LibroIpsum.js'], + function(LibroIpsum) { + // ... + }); + +### Browser + + +## Usage + new LibroIpsum(sourceText).generate(numberOfWords[, keyLength]); + +* `sourceText`: String representation of text from which phrases are to be generated +* `numberOfWords`: Number of words to be generated +* `keyLength`: Length of key (integer), where larger number will create phrase more similar to original text (optional, defaults to 6) + +## License + +Copyright (c) 2013 Andrew Duthie + +Released under the MIT License (see LICENSE.txt) diff --git a/docs/LibroIpsum.html b/docs/LibroIpsum.html new file mode 100644 index 0000000..eaab15b --- /dev/null +++ b/docs/LibroIpsum.html @@ -0,0 +1,165 @@ + LibroIpsum.coffee

LibroIpsum.coffee

LibroIpsum is a simple placeholder text generator similar to other lorem ipsum tools. +LibroIpsum differs from traditional lorem ipsum in that it can be used to generate random phrases from any source text, using character distribution analysis to generate new phrases.

+ +

Use LibroIpsum from Node.js, RequireJS, or directly in the browser.

+ +

Node.js

+ +
npm install LibroIpsum
+
+ +

RequireJS

+ +
define(['path/to/libs/LibroIpsum.js'],
+function(LibroIpsum) {
+    // ...
+});
+
+ +

Browser

+ +
<script src="path/to/libs/LibroIpsum.js"></script>
+
+ +

Usage

+ +
new LibroIpsum(sourceText).generate(numberOfWords[, keyLength]);
+
+ +
    +
  • sourceText: String representation of text from which phrases are to be generated
  • +
  • numberOfWords: Number of words to be generated
  • +
  • keyLength: Length of key (integer), where larger number will create phrase more similar to original text (optional, defaults to 6)
  • +
do ->

LibroIpsum

+ +

Generates phrases using character distribution of text from a given string

    class LibroIpsum

Ignore opening and closing punctuation because of difficulty to ensure matching pair

        @ignoredCharacters: [
+            '"'
+            '`'
+            '‘', '’'
+            '“', '”'
+            '[', ']'
+            '(', ')'
+            '{', '}'
+            '«', '»'
+        ]

Sentence-ending characters. Used to locate phrase-starting key

        @sentenceEnders: [
+            '.'
+            '!'
+            '?'
+        ]

Clause-separating characters. Used to cleanly end generated phrase

        @clauseSeparators: [
+            @sentenceEnders...
+            ','
+            ';'
+        ]
+
+        constructor: (@sourceText) ->
+            @frequencyLib = new FrequencyLibrary

Return randomly generated phrase with numberOfWords words based on character distribution of text, using key length keyLength

        generate: (numberOfWords, keyLength = 6) ->
+            currentWords = 0
+            workingKey = @getKey(keyLength)
+            phrase = workingKey
+
+            while currentWords <= numberOfWords
+                distributedChar = @getDistributedChar(workingKey)
+                if distributedChar?
+                    workingKey += distributedChar
+                    workingKey = workingKey.slice(1)
+                else if currentWords + 1 <= numberOfWords
+                    workingKey = @getKey(keyLength)
+                    distributedChar = " #{workingKey}"
+
+                phrase += distributedChar
+                currentWords++ if /\s/.test(distributedChar)
+
+            rCleanEnd = new RegExp("[\\\\#{LibroIpsum.clauseSeparators.join('\\\\')}\\s]*$")
+            phrase = phrase.replace(rCleanEnd, '') + '.'
+
+            phrase

Generates a random key from the text. +Preference is given to keys which start a sentence, but if no sentence structure is detected, a random substring is generated.

        getKey: (length) ->
+            return '' if !length
+
+            concatSentenceEnders = "\\\\#{LibroIpsum.sentenceEnders.join('\\\\')}"
+            rKey = new RegExp("(^[A-Z][a-z]{#{length - 1}}|[#{concatSentenceEnders}]\\s*[A-Z][a-z]{#{length - 1}})", 'gm');
+            keyMatch = @sourceText.match(rKey)
+
+            if keyMatch
+                rClean = new RegExp("^[#{concatSentenceEnders}]?\\s*(.+)")
+                key = keyMatch[Math.floor(Math.random() * keyMatch.length)].replace(rClean, '$1')
+            else
+                startIndex = Math.floor(Math.random() * (@sourceText.length - length))
+                key = @sourceText.substring(startIndex, startIndex + length)
+
+            key

Returns a character based on the character distribution of characters following the specified key

        getDistributedChar: (key) ->
+            unless @frequencyLib.contains(key)
+                foundIndex = 0
+                while foundIndex >= 0
+                    foundIndex = @sourceText.indexOf(key, foundIndex)
+                    keyMatchEnd = foundIndex + key.length
+                    if foundIndex >= 0
+                        foundIndex++
+                        lookAhead = @sourceText[keyMatchEnd]
+                        if keyMatchEnd < @sourceText.length and lookAhead not in LibroIpsum.ignoredCharacters
+                            @frequencyLib.add(key, lookAhead)
+
+            return null unless @frequencyLib.getFrequencies(key)
+            @frequencyLib.randomUniformChoose(key)

MultiSet

+ +

A set in which members can appear more than once

    class MultiSet
        constructor: (initialItem) ->
+            @cardinality = 0
+            @multiSetRep = {}
+
+            @add(initialChar) if initialItem

Returns the number of elements in this multiset (ie its cardinality). +Because multisets can include duplicates, the cardinality may be larger than the number of distinct elements.

        getCardinality: ->
+            @cardinality

Returns the number of occurrences of a given element in the multiset

        getElementCount: (target) ->
+            @multiSetRep[target] or 0

Returns a set such that every element in the multiset is in the set (but no duplicates exist)

        getElementSet: ->
+            Object.keys(@multiSetRep)

Adds a single element to the multiset, increasing cardinality by one

        add: (item) ->
+            @multiSetRep[item] = 0 unless item of @multiSetRep
+            @multiSetRep[item]++
+
+            @cardinality++;

Removes the target, if it is present in the multiset. +Returns true if and only if it changes the multiset. +Note that this method removes only a single instance of the target. +Thus, assuming the target is in the multiset, this method decreases the cardinality of the multiset by one.

        remove: (target) ->
+            return false unless target of @multiSetRep
+
+            @multiSetRep[target]--
+            delete @multiSetRep[target] if @multiSetRep[target] is 0
+
+            @cardinality--

Returns an item chosen randomly based upon the distribution of items of the multiset

        randomUniformChoose: ->
+            choice = Math.floor(Math.random() * @cardinality)
+
+            for key, keyCount of @multiSetRep
+                choice -= keyCount
+                if choice < 0
+                    return key

FrequencyLibrary

+ +

Helper class for tracking character distribution following keys, and choosing random character based upon distribution.

    class FrequencyLibrary
        constructor: ->
+            @freqLibRep = {}

Returns number of keys in the library

        size: ->
+            Object.keys(@freqLibRep).length

Returns true if the target book is contained in the library

        contains: (target) ->
+            target of @freqLibRep

Returns a MultiSet for the specified key in the library

        getFrequencies: (target) ->
+            return @freqLibRep[target]

Modifies the character occurrences associated with name to include one more occurrence of element

        add: (name, element) ->
+            ms = @getFrequencies(name)
+
+            unless ms?
+                ms = @freqLibRep[name] = new MultiSet
+
+            ms.add(element)

Modifies the character occurrences associated with name to include one less occurrence of element. +If this removal results in no elements being associated with name, name is removed from the library. +Returns true if and only if removal was successful.

        remove: (name, element) ->
+            return false unless @contains(target)
+
+            ms = @getFrequencies(name)
+
+            cardBefore = ms.getCardinality()
+            ms.remove(element)
+            cardAfter = ms.getCardinality()
+
+            freqLibRep.remove(name) if ms.getCardinality() is 0
+
+            cardBefore - cardAfter is 1

Returns a random character, chosen from the same distribution as the characters appear in the text. +For example, if 15% of the characters following "the" are 'i', then this method should return an 'i' 15% of the time.

        randomUniformChoose: (name) ->
+            ms = @getFrequencies(name)
+            return 0 unless ms?
+            ms.randomUniformChoose()

Object.keys polyfill (ES5)

    Object.keys ?= (obj) ->
+        key for key of keys when obj.hasOwnProperty(key)

Expose LibroIpsum

Add to global object

    this.LibroIpsum = LibroIpsum

Expose to Node.js

    module.exports = LibroIpsum if module?.exports?

Define with RequireJS

    if typeof define is 'function' and define.amd?
+        define 'LibroIpsum', -> LibroIpsum
+
+
\ No newline at end of file diff --git a/docs/docco.css b/docs/docco.css new file mode 100644 index 0000000..04cc7ec --- /dev/null +++ b/docs/docco.css @@ -0,0 +1,192 @@ +/*--------------------- Layout and Typography ----------------------------*/ +body { + font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; + font-size: 15px; + line-height: 22px; + color: #252519; + margin: 0; padding: 0; +} +a { + color: #261a3b; +} + a:visited { + color: #261a3b; + } +p { + margin: 0 0 15px 0; +} +h1, h2, h3, h4, h5, h6 { + margin: 0px 0 15px 0; +} + h1 { + margin-top: 40px; + } +hr { + border: 0 none; + border-top: 1px solid #e5e5ee; + height: 1px; + margin: 20px 0; +} +#container { + position: relative; +} +#background { + position: fixed; + top: 0; left: 525px; right: 0; bottom: 0; + background: #f5f5ff; + border-left: 1px solid #e5e5ee; + z-index: -1; +} +#jump_to, #jump_page { + background: white; + -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; + -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; + font: 10px Arial; + text-transform: uppercase; + cursor: pointer; + text-align: right; +} +#jump_to, #jump_wrapper { + position: fixed; + right: 0; top: 0; + padding: 5px 10px; +} + #jump_wrapper { + padding: 0; + display: none; + } + #jump_to:hover #jump_wrapper { + display: block; + } + #jump_page { + padding: 5px 0 3px; + margin: 0 0 25px 25px; + } + #jump_page .source { + display: block; + padding: 5px 10px; + text-decoration: none; + border-top: 1px solid #eee; + } + #jump_page .source:hover { + background: #f5f5ff; + } + #jump_page .source:first-child { + } +table td { + border: 0; + outline: 0; +} + td.docs, th.docs { + max-width: 450px; + min-width: 450px; + min-height: 5px; + padding: 10px 25px 1px 50px; + overflow-x: hidden; + vertical-align: top; + text-align: left; + } + .docs pre { + margin: 15px 0 15px; + padding-left: 15px; + } + .docs p tt, .docs p code { + background: #f8f8ff; + border: 1px solid #dedede; + font-size: 12px; + padding: 0 0.2em; + } + .pilwrap { + position: relative; + } + .pilcrow { + font: 12px Arial; + text-decoration: none; + color: #454545; + position: absolute; + top: 3px; left: -20px; + padding: 1px 2px; + opacity: 0; + -webkit-transition: opacity 0.2s linear; + } + td.docs:hover .pilcrow { + opacity: 1; + } + td.code, th.code { + padding: 14px 15px 16px 25px; + width: 100%; + vertical-align: top; + background: #f5f5ff; + border-left: 1px solid #e5e5ee; + } + pre, tt, code { + font-size: 12px; line-height: 18px; + font-family: Menlo, Monaco, Consolas, "Lucida Console", monospace; + margin: 0; padding: 0; + } + + +/*---------------------- Syntax Highlighting -----------------------------*/ +td.linenos { background-color: #f0f0f0; padding-right: 10px; } +span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } +body .hll { background-color: #ffffcc } +body .c { color: #408080; font-style: italic } /* Comment */ +body .err { border: 1px solid #FF0000 } /* Error */ +body .k { color: #954121 } /* Keyword */ +body .o { color: #666666 } /* Operator */ +body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ +body .cp { color: #BC7A00 } /* Comment.Preproc */ +body .c1 { color: #408080; font-style: italic } /* Comment.Single */ +body .cs { color: #408080; font-style: italic } /* Comment.Special */ +body .gd { color: #A00000 } /* Generic.Deleted */ +body .ge { font-style: italic } /* Generic.Emph */ +body .gr { color: #FF0000 } /* Generic.Error */ +body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +body .gi { color: #00A000 } /* Generic.Inserted */ +body .go { color: #808080 } /* Generic.Output */ +body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +body .gs { font-weight: bold } /* Generic.Strong */ +body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +body .gt { color: #0040D0 } /* Generic.Traceback */ +body .kc { color: #954121 } /* Keyword.Constant */ +body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ +body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ +body .kp { color: #954121 } /* Keyword.Pseudo */ +body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ +body .kt { color: #B00040 } /* Keyword.Type */ +body .m { color: #666666 } /* Literal.Number */ +body .s { color: #219161 } /* Literal.String */ +body .na { color: #7D9029 } /* Name.Attribute */ +body .nb { color: #954121 } /* Name.Builtin */ +body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +body .no { color: #880000 } /* Name.Constant */ +body .nd { color: #AA22FF } /* Name.Decorator */ +body .ni { color: #999999; font-weight: bold } /* Name.Entity */ +body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ +body .nf { color: #0000FF } /* Name.Function */ +body .nl { color: #A0A000 } /* Name.Label */ +body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +body .nt { color: #954121; font-weight: bold } /* Name.Tag */ +body .nv { color: #19469D } /* Name.Variable */ +body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +body .w { color: #bbbbbb } /* Text.Whitespace */ +body .mf { color: #666666 } /* Literal.Number.Float */ +body .mh { color: #666666 } /* Literal.Number.Hex */ +body .mi { color: #666666 } /* Literal.Number.Integer */ +body .mo { color: #666666 } /* Literal.Number.Oct */ +body .sb { color: #219161 } /* Literal.String.Backtick */ +body .sc { color: #219161 } /* Literal.String.Char */ +body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ +body .s2 { color: #219161 } /* Literal.String.Double */ +body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ +body .sh { color: #219161 } /* Literal.String.Heredoc */ +body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ +body .sx { color: #954121 } /* Literal.String.Other */ +body .sr { color: #BB6688 } /* Literal.String.Regex */ +body .s1 { color: #219161 } /* Literal.String.Single */ +body .ss { color: #19469D } /* Literal.String.Symbol */ +body .bp { color: #954121 } /* Name.Builtin.Pseudo */ +body .vc { color: #19469D } /* Name.Variable.Class */ +body .vg { color: #19469D } /* Name.Variable.Global */ +body .vi { color: #19469D } /* Name.Variable.Instance */ +body .il { color: #666666 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/lib/LibroIpsum.js b/lib/LibroIpsum.js new file mode 100644 index 0000000..a1ac7bc --- /dev/null +++ b/lib/LibroIpsum.js @@ -0,0 +1,225 @@ +/*! LibroIpsum 0.1.0 | (c) 2013 Andrew Duthie | MIT License */ +(function() { + var __slice = [].slice, + __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; + + (function() { + var FrequencyLibrary, LibroIpsum, MultiSet, _ref; + LibroIpsum = (function() { + + LibroIpsum.ignoredCharacters = ['"', '`', '‘', '’', '“', '”', '[', ']', '(', ')', '{', '}', '«', '»']; + + LibroIpsum.sentenceEnders = ['.', '!', '?']; + + LibroIpsum.clauseSeparators = __slice.call(LibroIpsum.sentenceEnders).concat([','], [';']); + + function LibroIpsum(sourceText) { + this.sourceText = sourceText; + this.frequencyLib = new FrequencyLibrary; + } + + LibroIpsum.prototype.generate = function(numberOfWords, keyLength) { + var currentWords, distributedChar, phrase, rCleanEnd, workingKey; + if (keyLength == null) { + keyLength = 6; + } + currentWords = 0; + workingKey = this.getKey(keyLength); + phrase = workingKey; + while (currentWords <= numberOfWords) { + distributedChar = this.getDistributedChar(workingKey); + if (distributedChar != null) { + workingKey += distributedChar; + workingKey = workingKey.slice(1); + } else if (currentWords + 1 <= numberOfWords) { + workingKey = this.getKey(keyLength); + distributedChar = " " + workingKey; + } + phrase += distributedChar; + if (/\s/.test(distributedChar)) { + currentWords++; + } + } + rCleanEnd = new RegExp("[\\\\" + (LibroIpsum.clauseSeparators.join('\\\\')) + "\\s]*$"); + phrase = phrase.replace(rCleanEnd, '') + '.'; + return phrase; + }; + + LibroIpsum.prototype.getKey = function(length) { + var concatSentenceEnders, key, keyMatch, rClean, rKey, startIndex; + if (!length) { + return ''; + } + concatSentenceEnders = "\\\\" + (LibroIpsum.sentenceEnders.join('\\\\')); + rKey = new RegExp("(^[A-Z][a-z]{" + (length - 1) + "}|[" + concatSentenceEnders + "]\\s*[A-Z][a-z]{" + (length - 1) + "})", 'gm'); + keyMatch = this.sourceText.match(rKey); + if (keyMatch) { + rClean = new RegExp("^[" + concatSentenceEnders + "]?\\s*(.+)"); + key = keyMatch[Math.floor(Math.random() * keyMatch.length)].replace(rClean, '$1'); + } else { + startIndex = Math.floor(Math.random() * (this.sourceText.length - length)); + key = this.sourceText.substring(startIndex, startIndex + length); + } + return key; + }; + + LibroIpsum.prototype.getDistributedChar = function(key) { + var foundIndex, keyMatchEnd, lookAhead; + if (!this.frequencyLib.contains(key)) { + foundIndex = 0; + while (foundIndex >= 0) { + foundIndex = this.sourceText.indexOf(key, foundIndex); + keyMatchEnd = foundIndex + key.length; + if (foundIndex >= 0) { + foundIndex++; + lookAhead = this.sourceText[keyMatchEnd]; + if (keyMatchEnd < this.sourceText.length && __indexOf.call(LibroIpsum.ignoredCharacters, lookAhead) < 0) { + this.frequencyLib.add(key, lookAhead); + } + } + } + } + if (!this.frequencyLib.getFrequencies(key)) { + return null; + } + return this.frequencyLib.randomUniformChoose(key); + }; + + return LibroIpsum; + + })(); + MultiSet = (function() { + + function MultiSet(initialItem) { + this.cardinality = 0; + this.multiSetRep = {}; + if (initialItem) { + this.add(initialChar); + } + } + + MultiSet.prototype.getCardinality = function() { + return this.cardinality; + }; + + MultiSet.prototype.getElementCount = function(target) { + return this.multiSetRep[target] || 0; + }; + + MultiSet.prototype.getElementSet = function() { + return Object.keys(this.multiSetRep); + }; + + MultiSet.prototype.add = function(item) { + if (!(item in this.multiSetRep)) { + this.multiSetRep[item] = 0; + } + this.multiSetRep[item]++; + return this.cardinality++; + }; + + MultiSet.prototype.remove = function(target) { + if (!(target in this.multiSetRep)) { + return false; + } + this.multiSetRep[target]--; + if (this.multiSetRep[target] === 0) { + delete this.multiSetRep[target]; + } + return this.cardinality--; + }; + + MultiSet.prototype.randomUniformChoose = function() { + var choice, key, keyCount, _ref; + choice = Math.floor(Math.random() * this.cardinality); + _ref = this.multiSetRep; + for (key in _ref) { + keyCount = _ref[key]; + choice -= keyCount; + if (choice < 0) { + return key; + } + } + }; + + return MultiSet; + + })(); + FrequencyLibrary = (function() { + + function FrequencyLibrary() { + this.freqLibRep = {}; + } + + FrequencyLibrary.prototype.size = function() { + return Object.keys(this.freqLibRep).length; + }; + + FrequencyLibrary.prototype.contains = function(target) { + return target in this.freqLibRep; + }; + + FrequencyLibrary.prototype.getFrequencies = function(target) { + return this.freqLibRep[target]; + }; + + FrequencyLibrary.prototype.add = function(name, element) { + var ms; + ms = this.getFrequencies(name); + if (ms == null) { + ms = this.freqLibRep[name] = new MultiSet; + } + return ms.add(element); + }; + + FrequencyLibrary.prototype.remove = function(name, element) { + var cardAfter, cardBefore, ms; + if (!this.contains(target)) { + return false; + } + ms = this.getFrequencies(name); + cardBefore = ms.getCardinality(); + ms.remove(element); + cardAfter = ms.getCardinality(); + if (ms.getCardinality() === 0) { + freqLibRep.remove(name); + } + return cardBefore - cardAfter === 1; + }; + + FrequencyLibrary.prototype.randomUniformChoose = function(name) { + var ms; + ms = this.getFrequencies(name); + if (ms == null) { + return 0; + } + return ms.randomUniformChoose(); + }; + + return FrequencyLibrary; + + })(); + if ((_ref = Object.keys) == null) { + Object.keys = function(obj) { + var key, _results; + _results = []; + for (key in keys) { + if (obj.hasOwnProperty(key)) { + _results.push(key); + } + } + return _results; + }; + } + this.LibroIpsum = LibroIpsum; + if ((typeof module !== "undefined" && module !== null ? module.exports : void 0) != null) { + module.exports = LibroIpsum; + } + if (typeof define === 'function' && (define.amd != null)) { + return define('LibroIpsum', function() { + return LibroIpsum; + }); + } + })(); + +}).call(this); diff --git a/lib/LibroIpsum.min.js b/lib/LibroIpsum.min.js new file mode 100644 index 0000000..1774e43 --- /dev/null +++ b/lib/LibroIpsum.min.js @@ -0,0 +1,2 @@ +/*! LibroIpsum 0.1.0 | (c) 2013 Andrew Duthie | MIT License */ +(function(){var t=[].slice,e=[].indexOf||function(t){for(var e=0,n=this.length;n>e;e++)if(e in this&&this[e]===t)return e;return-1};(function(){var n,i,r,o;return i=function(){function i(t){this.sourceText=t,this.frequencyLib=new n}return i.ignoredCharacters=['"',"`","‘","’","“","”","[","]","(",")","{","}","«","»"],i.sentenceEnders=[".","!","?"],i.clauseSeparators=t.call(i.sentenceEnders).concat([","],[";"]),i.prototype.generate=function(t,e){var n,r,o,u,s;for(null==e&&(e=6),n=0,s=this.getKey(e),o=s;t>=n;)r=this.getDistributedChar(s),null!=r?(s+=r,s=s.slice(1)):t>=n+1&&(s=this.getKey(e),r=" "+s),o+=r,/\s/.test(r)&&n++;return u=RegExp("[\\\\"+i.clauseSeparators.join("\\\\")+"\\s]*$"),o=o.replace(u,"")+"."},i.prototype.getKey=function(t){var e,n,r,o,u,s;return t?(e="\\\\"+i.sentenceEnders.join("\\\\"),u=RegExp("(^[A-Z][a-z]{"+(t-1)+"}|["+e+"]\\s*[A-Z][a-z]{"+(t-1)+"})","gm"),r=this.sourceText.match(u),r?(o=RegExp("^["+e+"]?\\s*(.+)"),n=r[Math.floor(Math.random()*r.length)].replace(o,"$1")):(s=Math.floor(Math.random()*(this.sourceText.length-t)),n=this.sourceText.substring(s,s+t)),n):""},i.prototype.getDistributedChar=function(t){var n,r,o;if(!this.frequencyLib.contains(t))for(n=0;n>=0;)n=this.sourceText.indexOf(t,n),r=n+t.length,n>=0&&(n++,o=this.sourceText[r],this.sourceText.length>r&&0>e.call(i.ignoredCharacters,o)&&this.frequencyLib.add(t,o));return this.frequencyLib.getFrequencies(t)?this.frequencyLib.randomUniformChoose(t):null},i}(),r=function(){function t(t){this.cardinality=0,this.multiSetRep={},t&&this.add(initialChar)}return t.prototype.getCardinality=function(){return this.cardinality},t.prototype.getElementCount=function(t){return this.multiSetRep[t]||0},t.prototype.getElementSet=function(){return Object.keys(this.multiSetRep)},t.prototype.add=function(t){return t in this.multiSetRep||(this.multiSetRep[t]=0),this.multiSetRep[t]++,this.cardinality++},t.prototype.remove=function(t){return t in this.multiSetRep?(this.multiSetRep[t]--,0===this.multiSetRep[t]&&delete this.multiSetRep[t],this.cardinality--):!1},t.prototype.randomUniformChoose=function(){var t,e,n,i;t=Math.floor(Math.random()*this.cardinality),i=this.multiSetRep;for(e in i)if(n=i[e],t-=n,0>t)return e},t}(),n=function(){function t(){this.freqLibRep={}}return t.prototype.size=function(){return Object.keys(this.freqLibRep).length},t.prototype.contains=function(t){return t in this.freqLibRep},t.prototype.getFrequencies=function(t){return this.freqLibRep[t]},t.prototype.add=function(t,e){var n;return n=this.getFrequencies(t),null==n&&(n=this.freqLibRep[t]=new r),n.add(e)},t.prototype.remove=function(t,e){var n,i,r;return this.contains(target)?(r=this.getFrequencies(t),i=r.getCardinality(),r.remove(e),n=r.getCardinality(),0===r.getCardinality()&&freqLibRep.remove(t),1===i-n):!1},t.prototype.randomUniformChoose=function(t){var e;return e=this.getFrequencies(t),null==e?0:e.randomUniformChoose()},t}(),null==(o=Object.keys)&&(Object.keys=function(t){var e,n;n=[];for(e in keys)t.hasOwnProperty(e)&&n.push(e);return n}),this.LibroIpsum=i,null!=("undefined"!=typeof module&&null!==module?module.exports:void 0)&&(module.exports=i),"function"==typeof define&&null!=define.amd?define("LibroIpsum",function(){return i}):void 0})()}).call(this); \ No newline at end of file diff --git a/package.json b/package.json new file mode 100755 index 0000000..f063e14 --- /dev/null +++ b/package.json @@ -0,0 +1,20 @@ +{ + "name": "LibroIpsum", + "version": "0.1.0", + "description": "Generates phrases using character distribution of text", + "keywords": [ + "libro", + "lorem", + "ipsum", + "lipsum" + ], + "author": "Andrew Duthie ", + "license": "MIT", + "devDependencies": { + "grunt": "~0.4.0rc7", + "grunt-contrib-watch": "~0.2.0", + "grunt-contrib-coffee": "~0.4.0", + "grunt-contrib-concat": "~0.1.2", + "grunt-contrib-uglify": "~0.1.1" + } +} diff --git a/src/LibroIpsum.coffee b/src/LibroIpsum.coffee new file mode 100644 index 0000000..a827ae6 --- /dev/null +++ b/src/LibroIpsum.coffee @@ -0,0 +1,230 @@ +# LibroIpsum is a simple placeholder text generator similar to other [lorem ipsum](http://en.wikipedia.org/wiki/Lorem_ipsum) tools. +# LibroIpsum differs from traditional lorem ipsum in that it can be used to generate random phrases from any source text, using character distribution analysis to generate new phrases. + +# Use LibroIpsum from Node.js, RequireJS, or directly in the browser. + +# ### Node.js +# npm install LibroIpsum +# ### RequireJS +# define(['path/to/libs/LibroIpsum.js'], +# function(LibroIpsum) { +# // ... +# }); +# ### Browser +# + +# # Usage +# new LibroIpsum(sourceText).generate(numberOfWords[, keyLength]); + +# * `sourceText`: String representation of text from which phrases are to be generated +# * `numberOfWords`: Number of words to be generated +# * `keyLength`: Length of key (integer), where larger number will create phrase more similar to original text (optional, defaults to 6) + +# +do -> + #### LibroIpsum + # Generates phrases using character distribution of text from a given string + class LibroIpsum + # Ignore opening and closing punctuation because of difficulty to ensure matching pair + @ignoredCharacters: [ + '"' + '`' + '‘', '’' + '“', '”' + '[', ']' + '(', ')' + '{', '}' + '«', '»' + ] + + # Sentence-ending characters. Used to locate phrase-starting key + @sentenceEnders: [ + '.' + '!' + '?' + ] + + # Clause-separating characters. Used to cleanly end generated phrase + @clauseSeparators: [ + @sentenceEnders... + ',' + ';' + ] + + constructor: (@sourceText) -> + @frequencyLib = new FrequencyLibrary + + # Return randomly generated phrase with `numberOfWords` words based on character distribution of text, using key length `keyLength` + generate: (numberOfWords, keyLength = 6) -> + currentWords = 0 + workingKey = @getKey(keyLength) + phrase = workingKey + + while currentWords <= numberOfWords + distributedChar = @getDistributedChar(workingKey) + if distributedChar? + workingKey += distributedChar + workingKey = workingKey.slice(1) + else if currentWords + 1 <= numberOfWords + workingKey = @getKey(keyLength) + distributedChar = " #{workingKey}" + + phrase += distributedChar + currentWords++ if /\s/.test(distributedChar) + + rCleanEnd = new RegExp("[\\\\#{LibroIpsum.clauseSeparators.join('\\\\')}\\s]*$") + phrase = phrase.replace(rCleanEnd, '') + '.' + + phrase + + # Generates a random key from the text. + # Preference is given to keys which start a sentence, but if no sentence structure is detected, a random substring is generated. + getKey: (length) -> + return '' if !length + + concatSentenceEnders = "\\\\#{LibroIpsum.sentenceEnders.join('\\\\')}" + rKey = new RegExp("(^[A-Z][a-z]{#{length - 1}}|[#{concatSentenceEnders}]\\s*[A-Z][a-z]{#{length - 1}})", 'gm'); + keyMatch = @sourceText.match(rKey) + + if keyMatch + rClean = new RegExp("^[#{concatSentenceEnders}]?\\s*(.+)") + key = keyMatch[Math.floor(Math.random() * keyMatch.length)].replace(rClean, '$1') + else + startIndex = Math.floor(Math.random() * (@sourceText.length - length)) + key = @sourceText.substring(startIndex, startIndex + length) + + key + + # Returns a character based on the character distribution of characters following the specified key + getDistributedChar: (key) -> + unless @frequencyLib.contains(key) + foundIndex = 0 + while foundIndex >= 0 + foundIndex = @sourceText.indexOf(key, foundIndex) + keyMatchEnd = foundIndex + key.length + if foundIndex >= 0 + foundIndex++ + lookAhead = @sourceText[keyMatchEnd] + if keyMatchEnd < @sourceText.length and lookAhead not in LibroIpsum.ignoredCharacters + @frequencyLib.add(key, lookAhead) + + return null unless @frequencyLib.getFrequencies(key) + @frequencyLib.randomUniformChoose(key) + + #### MultiSet + # A set in which members can appear more than once + class MultiSet + # + constructor: (initialItem) -> + @cardinality = 0 + @multiSetRep = {} + + @add(initialChar) if initialItem + + # Returns the number of elements in this multiset (ie its cardinality). + # Because multisets can include duplicates, the cardinality may be larger than the number of distinct elements. + getCardinality: -> + @cardinality + + # Returns the number of occurrences of a given element in the multiset + getElementCount: (target) -> + @multiSetRep[target] or 0 + + # Returns a set such that every element in the multiset is in the set (but no duplicates exist) + getElementSet: -> + Object.keys(@multiSetRep) + + # Adds a single element to the multiset, increasing cardinality by one + add: (item) -> + @multiSetRep[item] = 0 unless item of @multiSetRep + @multiSetRep[item]++ + + @cardinality++; + + # Removes the target, if it is present in the multiset. + # Returns true if and only if it changes the multiset. + # Note that this method removes only a single instance of the target. + # Thus, assuming the target is in the multiset, this method decreases the cardinality of the multiset by one. + remove: (target) -> + return false unless target of @multiSetRep + + @multiSetRep[target]-- + delete @multiSetRep[target] if @multiSetRep[target] is 0 + + @cardinality-- + + # Returns an item chosen randomly based upon the distribution of items of the multiset + randomUniformChoose: -> + choice = Math.floor(Math.random() * @cardinality) + + for key, keyCount of @multiSetRep + choice -= keyCount + if choice < 0 + return key + + #### FrequencyLibrary + # Helper class for tracking character distribution following keys, and choosing random character based upon distribution. + class FrequencyLibrary + # + constructor: -> + @freqLibRep = {} + + # Returns number of keys in the library + size: -> + Object.keys(@freqLibRep).length + + # Returns true if the target book is contained in the library + contains: (target) -> + target of @freqLibRep + + # Returns a MultiSet for the specified key in the library + getFrequencies: (target) -> + return @freqLibRep[target] + + # Modifies the character occurrences associated with name to include one more occurrence of element + add: (name, element) -> + ms = @getFrequencies(name) + + unless ms? + ms = @freqLibRep[name] = new MultiSet + + ms.add(element) + + # Modifies the character occurrences associated with name to include one less occurrence of element. + # If this removal results in no elements being associated with name, name is removed from the library. + # Returns true if and only if removal was successful. + remove: (name, element) -> + return false unless @contains(target) + + ms = @getFrequencies(name) + + cardBefore = ms.getCardinality() + ms.remove(element) + cardAfter = ms.getCardinality() + + freqLibRep.remove(name) if ms.getCardinality() is 0 + + cardBefore - cardAfter is 1 + + # Returns a random character, chosen from the same distribution as the characters appear in the text. + # For example, if 15% of the characters following "the" are 'i', then this method should return an 'i' 15% of the time. + randomUniformChoose: (name) -> + ms = @getFrequencies(name) + return 0 unless ms? + ms.randomUniformChoose() + + # Object.keys polyfill (ES5) + Object.keys ?= (obj) -> + key for key of keys when obj.hasOwnProperty(key) + + #### Expose LibroIpsum + + # Add to global object + this.LibroIpsum = LibroIpsum + + # Expose to Node.js + module.exports = LibroIpsum if module?.exports? + + # Define with RequireJS + if typeof define is 'function' and define.amd? + define 'LibroIpsum', -> LibroIpsum