Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge branch 'release/0.5.0'

  • Loading branch information...
commit 798638ce98f8558ce8777c8f1d5fb4d5299f3cc6 2 parents 8a7af1c + 45e6ad0
@benlangfeld authored
Showing with 1,753 additions and 384 deletions.
  1. +15 −0 CHANGELOG.md
  2. +108 −14 README.md
  3. +71 −10 lib/ruby_speech/generic_element.rb
  4. +4 −1 lib/ruby_speech/grxml.rb
  5. +4 −0 lib/ruby_speech/grxml/element.rb
  6. +177 −46 lib/ruby_speech/grxml/grammar.rb
  7. +12 −11 lib/ruby_speech/grxml/item.rb
  8. +16 −0 lib/ruby_speech/grxml/match.rb
  9. +10 −0 lib/ruby_speech/grxml/no_match.rb
  10. +4 −11 lib/ruby_speech/grxml/one_of.rb
  11. +0 −11 lib/ruby_speech/grxml/rule.rb
  12. +0 −11 lib/ruby_speech/grxml/ruleref.rb
  13. +0 −11 lib/ruby_speech/grxml/tag.rb
  14. +8 −11 lib/ruby_speech/grxml/token.rb
  15. +6 −0 lib/ruby_speech/ssml.rb
  16. +1 −12 lib/ruby_speech/ssml/audio.rb
  17. +0 −11 lib/ruby_speech/ssml/break.rb
  18. +24 −0 lib/ruby_speech/ssml/desc.rb
  19. +1 −12 lib/ruby_speech/ssml/emphasis.rb
  20. +43 −0 lib/ruby_speech/ssml/mark.rb
  21. +25 −0 lib/ruby_speech/ssml/p.rb
  22. +72 −0 lib/ruby_speech/ssml/phoneme.rb
  23. +1 −12 lib/ruby_speech/ssml/prosody.rb
  24. +25 −0 lib/ruby_speech/ssml/s.rb
  25. +0 −11 lib/ruby_speech/ssml/say_as.rb
  26. +2 −44 lib/ruby_speech/ssml/speak.rb
  27. +42 −0 lib/ruby_speech/ssml/sub.rb
  28. +1 −12 lib/ruby_speech/ssml/voice.rb
  29. +1 −1  lib/ruby_speech/version.rb
  30. +478 −35 spec/ruby_speech/grxml/grammar_spec.rb
  31. +5 −2 spec/ruby_speech/grxml/item_spec.rb
  32. +49 −0 spec/ruby_speech/grxml/match_spec.rb
  33. +17 −0 spec/ruby_speech/grxml/no_match_spec.rb
  34. +1 −1  spec/ruby_speech/grxml/one_of_spec.rb
  35. +1 −1  spec/ruby_speech/grxml/rule_spec.rb
  36. +1 −1  spec/ruby_speech/grxml/ruleref_spec.rb
  37. +1 −1  spec/ruby_speech/grxml/tag_spec.rb
  38. +11 −1 spec/ruby_speech/grxml/token_spec.rb
  39. +64 −5 spec/ruby_speech/grxml_spec.rb
  40. +5 −6 spec/ruby_speech/ssml/audio_spec.rb
  41. +1 −1  spec/ruby_speech/ssml/break_spec.rb
  42. +57 −0 spec/ruby_speech/ssml/desc_spec.rb
  43. +1 −4 spec/ruby_speech/ssml/emphasis_spec.rb
  44. +53 −0 spec/ruby_speech/ssml/mark_spec.rb
  45. +96 −0 spec/ruby_speech/ssml/p_spec.rb
  46. +65 −0 spec/ruby_speech/ssml/phoneme_spec.rb
  47. +9 −4 spec/ruby_speech/ssml/prosody_spec.rb
  48. +92 −0 spec/ruby_speech/ssml/s_spec.rb
  49. +1 −1  spec/ruby_speech/ssml/say_as_spec.rb
  50. +1 −6 spec/ruby_speech/ssml/speak_spec.rb
  51. +57 −0 spec/ruby_speech/ssml/sub_spec.rb
  52. +1 −6 spec/ruby_speech/ssml/voice_spec.rb
  53. +0 −4 spec/spec_helper.rb
  54. +13 −53 spec/support/matchers.rb
View
15 CHANGELOG.md
@@ -1,5 +1,20 @@
# develop
+# 0.5.0 - 2012-01-03
+ * Feature: Add a whole bunch more SSML elements:
+ ** p & s
+ ** mark
+ ** desc
+ ** sub
+ ** phoneme
+ * Feature: Added the ability to inline grammar rule references in both destructive and non-destructive modes
+ * Feature: Added the ability to tokenize a grammar, turning all tokens into unambiguous `<token/>` elements
+ * Feature: Added the ability to whitespace normalize a grammar
+ * Feature: Added the ability to match an input string against a Grammar
+ * Feature: Constructing a GRXML grammar with a root rule specified but not provided will raise an exception
+ * Feature: Embedding a GRXML grammar of a mode different from the host will raise an exception
+ * Bugfix: Fix upward traversal through a document via #parent
+
# 0.4.0 - 2011-12-30
* Feature: Add the ability to look up child elements by name/attributes easily
* Feature: Allow easy access to a GRXML grammar's root rule element
View
122 README.md
@@ -46,15 +46,15 @@ Once your `Speak` is fully prepared and you're ready to send it off for processi
You may also then need to call `to_s`.
-Contruct a GRXML (SRGS) document like this:
+Construct a GRXML (SRGS) document like this:
```ruby
require 'ruby_speech'
-grammy = RubySpeech::GRXML.draw :mode => 'dtmf', :root => 'digits' do
- rule id: 'digits' do
+grammy = RubySpeech::GRXML.draw mode: :dtmf, root: 'pin' do
+ rule id: 'digit' do
one_of do
- 0.upto(9) {|d| item { d.to_s } }
+ ('0'..'9').map { |d| item { d } }
end
end
@@ -79,8 +79,8 @@ grammy.to_s
which becomes
```xml
-<grammar xmlns="http://www.w3.org/2001/06/grammar" version="1.0" xml:lang="en-US" mode="dtmf" root="digits">
- <rule id="digits">
+<grammar xmlns="http://www.w3.org/2001/06/grammar" version="1.0" xml:lang="en-US" mode="dtmf" root="pin">
+ <rule id="digit">
<one-of>
<item>0</item>
<item>1</item>
@@ -103,6 +103,101 @@ which becomes
</grammar>
```
+### Grammar matching
+
+It is possible to match some arbitrary input against a GRXML grammar. In order to do so, certain normalization routines should first be run on the grammar in order to prepare it for matching. These are reference inlining, tokenization and whitespace normalization, and are described [in the SRGS spec](http://www.w3.org/TR/speech-grammar/#S2.1). This process will transform the above grammar like so:
+
+```ruby
+grammy.inline!
+grammy.tokenize!
+grammy.normalize_whitespace
+```
+
+```xml
+<grammar xmlns="http://www.w3.org/2001/06/grammar" version="1.0" xml:lang="en-US" mode="dtmf" root="pin">
+ <rule id="pin" scope="public">
+ <one-of>
+ <item>
+ <item repeat="4">
+ <one-of>
+ <item>
+ <token>0</token>
+ </item>
+ <item>
+ <token>1</token>
+ </item>
+ <item>
+ <token>2</token>
+ </item>
+ <item>
+ <token>3</token>
+ </item>
+ <item>
+ <token>4</token>
+ </item>
+ <item>
+ <token>5</token>
+ </item>
+ <item>
+ <token>6</token>
+ </item>
+ <item>
+ <token>7</token>
+ </item>
+ <item>
+ <token>8</token>
+ </item>
+ <item>
+ <token>9</token>
+ </item>
+ </one-of>
+ </item>
+ <token>#</token>
+ </item>
+ <item>
+ <token>*</token>
+ <token>9</token>
+ </item>
+ </one-of>
+ </rule>
+</grammar>
+```
+
+Matching against some sample input strings then returns the following results:
+
+```ruby
+>> subject.match '*9'
+=> #<RubySpeech::GRXML::Match:0x00000100ae5d98
+ @mode = :dtmf,
+ @confidence = 1,
+ @utterance = "*9",
+ @interpretation = "*9"
+ >
+>> subject.match '1234#'
+=> #<RubySpeech::GRXML::Match:0x00000100b7e020
+ @mode = :dtmf,
+ @confidence = 1,
+ @utterance = "1234#",
+ @interpretation = "1234#"
+ >
+>> subject.match '5678#'
+=> #<RubySpeech::GRXML::Match:0x00000101218688
+ @mode = :dtmf,
+ @confidence = 1,
+ @utterance = "5678#",
+ @interpretation = "5678#"
+ >
+>> subject.match '1111#'
+=> #<RubySpeech::GRXML::Match:0x000001012f69d8
+ @mode = :dtmf,
+ @confidence = 1,
+ @utterance = "1111#",
+ @interpretation = "1111#"
+ >
+>> subject.match '111'
+=> #<RubySpeech::GRXML::NoMatch:0x00000101371660>
+```
+
Check out the [YARD documentation](http://rdoc.info/github/benlangfeld/ruby_speech/master/frames) for more
## Features:
@@ -114,6 +209,13 @@ Check out the [YARD documentation](http://rdoc.info/github/benlangfeld/ruby_spee
* `<say-as/>`
* `<break/>`
* `<audio/>`
+* `<p/>` and `<s/>`
+* `<phoneme/>`
+* `<sub/>`
+
+#### Misc
+* `<mark/>`
+* `<desc/>`
### GRXML
* Document construction
@@ -126,17 +228,9 @@ Check out the [YARD documentation](http://rdoc.info/github/benlangfeld/ruby_spee
## TODO:
### SSML
-#### Document Structure
-* `<p/>` and `<s/>`
-* `<phoneme/>`
-* `<sub/>`
* `<lexicon/>`
* `<meta/>` and `<metadata/>`
-#### Misc
-* `<mark/>`
-* `<desc/>`
-
### GRXML
* `<meta/>` and `<metadata/>`
* `<example/>`
View
81 lib/ruby_speech/generic_element.rb
@@ -4,7 +4,7 @@ module RubySpeech
module GenericElement
def self.included(klass)
- klass.class_attribute :registered_ns, :registered_name
+ klass.class_attribute :registered_ns, :registered_name, :defaults
klass.extend ClassMethods
end
@@ -43,7 +43,7 @@ def class_from_registration(name)
def import(node)
node = Nokogiri::XML.parse(node, nil, nil, Nokogiri::XML::ParseOptions::NOBLANKS).root unless node.is_a?(Nokogiri::XML::Node)
return node.content if node.is_a?(Nokogiri::XML::Text)
- klass = class_from_registration(node.element_name)
+ klass = class_from_registration node.element_name
if klass && klass != self
klass.import node
else
@@ -51,26 +51,73 @@ def import(node)
end
end
- def new(element_name, atts = {}, &block)
+ def new(atts = {}, &block)
blk_proc = lambda do |new_node|
- atts.each_pair { |k, v| new_node.send :"#{k}=", v }
+ (self.defaults || {}).merge(atts).each_pair { |k, v| new_node.send :"#{k}=", v }
block_return = new_node.eval_dsl_block &block
- new_node << new_node.encode_special_chars(block_return) if block_return.is_a?(String)
+ new_node << block_return if block_return.is_a?(String)
end
case RUBY_VERSION.split('.')[0,2].join.to_i
when 18
- super(element_name).tap do |n|
+ super(self.registered_name, nil, self.namespace).tap do |n|
blk_proc[n]
end
else
- super(element_name) do |n|
+ super(self.registered_name, nil, self.namespace) do |n|
blk_proc[n]
end
end
end
end
+ attr_writer :parent
+
+ def parent
+ @parent || super
+ end
+
+ def inherit(node)
+ self.parent = node.parent
+ super
+ end
+
+ def version
+ read_attr :version
+ end
+
+ def version=(other)
+ write_attr :version, other
+ end
+
+ ##
+ # @return [String] the base URI to which relative URLs are resolved
+ #
+ def base_uri
+ read_attr :base
+ end
+
+ ##
+ # @param [String] uri the base URI to which relative URLs are resolved
+ #
+ def base_uri=(uri)
+ write_attr 'xml:base', uri
+ end
+
+ def to_doc
+ Nokogiri::XML::Document.new.tap do |doc|
+ doc << self
+ end
+ end
+
+ def +(other)
+ self.class.new(:base_uri => base_uri).tap do |new_element|
+ (self.children + other.children).each do |child|
+ new_element << child
+ end
+ end
+ end
+
def eval_dsl_block(&block)
return unless block_given?
@block_binding = eval "self", block.binding
@@ -106,19 +153,24 @@ def embed(other)
when self.class.module::Element
self << other
else
- raise ArgumentError, "Can only embed a String or an SSML element"
+ raise ArgumentError, "Can only embed a String or a #{self.class.module} element, not a #{other}"
end
end
def string(other)
- self << encode_special_chars(other)
+ self << other
+ end
+
+ def <<(other)
+ other = encode_special_chars other if other.is_a? String
+ super other
end
def method_missing(method_name, *args, &block)
const_name = method_name.to_s.sub('ssml', '').titleize.gsub(' ', '')
if self.class.module.const_defined?(const_name)
const = self.class.module.const_get const_name
- self << const.new(*args, &block)
+ embed const.new(*args, &block)
elsif @block_binding && @block_binding.respond_to?(method_name)
@block_binding.send method_name, *args, &block
else
@@ -126,6 +178,15 @@ def method_missing(method_name, *args, &block)
end
end
+ def clone
+ GRXML.import to_xml
+ end
+
+ def traverse(&block)
+ nokogiri_children.each { |j| j.traverse &block }
+ block.call self
+ end
+
def eql?(o, *args)
super o, :content, :children, *args
end
View
5 lib/ruby_speech/grxml.rb
@@ -13,6 +13,9 @@ module GRXML
autoload :Token
end
+ autoload :Match
+ autoload :NoMatch
+
InvalidChildError = Class.new StandardError
GRXML_NAMESPACE = 'http://www.w3.org/2001/06/grammar'
@@ -21,7 +24,7 @@ def self.draw(attributes = {}, &block)
Grammar.new(attributes).tap do |grammar|
block_return = grammar.eval_dsl_block &block
grammar << block_return if block_return.is_a?(String)
- end
+ end.assert_has_matching_root_rule
end
def self.import(other)
View
4 lib/ruby_speech/grxml/element.rb
@@ -18,6 +18,10 @@ def self.module
alias_method :nokogiri_children, :children
include GenericElement
+
+ def regexp_content # :nodoc:
+ children.map(&:regexp_content).join
+ end
end # Element
end # GRXML
end # RubySpeech
View
223 lib/ruby_speech/grxml/grammar.rb
@@ -18,37 +18,9 @@ class Grammar < Element
register :grammar
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, Rule, Tag].freeze
-
- ##
- # Create a new GRXML grammar root element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Grammar] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- new_node = super('grammar', atts)
- new_node[:version] = '1.0'
- new_node.namespace = GRXML_NAMESPACE
- new_node.language ||= "en-US"
- new_node.eval_dsl_block &block
- new_node
- end
-
- ##
- # @return [String] the base URI to which relative URLs are resolved
- #
- def base_uri
- read_attr :base
- end
+ self.defaults = { :version => '1.0', :language => "en-US" }
- ##
- # @param [String] uri the base URI to which relative URLs are resolved
- #
- def base_uri=(uri)
- write_attr 'xml:base', uri
- end
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, Rule, Tag].freeze
##
#
@@ -57,7 +29,7 @@ def base_uri=(uri)
# @return [String]
#
def mode
- read_attr :mode
+ read_attr :mode, :to_sym
end
##
@@ -84,17 +56,6 @@ def root=(ia)
write_attr :root, ia
end
- def <<(arg)
- raise InvalidChildError, "A Grammar can only accept Rule and Tag as children" unless VALID_CHILD_TYPES.include? arg.class
- super
- end
-
- def to_doc
- Nokogiri::XML::Document.new.tap do |doc|
- doc << self
- end
- end
-
##
#
# @return [String]
@@ -110,11 +71,39 @@ def tag_format=(s)
write_attr :'tag-format', s
end
+ ##
+ # @return [Rule] The root rule node for the document
+ #
def root_rule
children(:rule, :id => root).first
end
+ ##
+ # Checks for a root rule matching the value of the root tag
+ #
+ # @raises [InvalidChildError] if there is not a rule present in the document with the correct ID
+ #
+ # @return [Grammar] self
+ #
+ def assert_has_matching_root_rule
+ raise InvalidChildError, "A GRXML document must have a rule matching the root rule name" unless has_matching_root_rule?
+ self
+ end
+
+ ##
+ # @return [Grammar] an inlined copy of self
+ #
def inline
+ clone.inline!
+ end
+
+ ##
+ # Replaces rulerefs in the document with a copy of the original rule.
+ # Removes all top level rules except the root rule
+ #
+ # @return self
+ #
+ def inline!
find("//ns:ruleref", :ns => namespace_href).each do |ref|
rule = children(:rule, :id => ref[:uri].sub(/^#/, '')).first
ref.swap rule.nokogiri_children
@@ -126,17 +115,159 @@ def inline
self
end
- def +(other)
- self.class.new(:base_uri => base_uri).tap do |new_grammar|
- (self.children + other.children).each do |child|
- new_grammar << child
+ ##
+ # Replaces textual content of the document with token elements containing such content.
+ # This homogenises all tokens in the document to a consistent format for processing.
+ #
+ def tokenize!
+ traverse do |element|
+ next unless element.is_a? Nokogiri::XML::Text
+
+ next if self.class.import(element.parent).is_a? Token
+
+ tokens = split_tokens(element).map do |string|
+ Token.new.tap { |token| token << string }
end
+
+ element.swap Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new, tokens)
end
end
+ ##
+ # Normalizes whitespace within tokens in the document according to the rules in the SRGS spec (http://www.w3.org/TR/speech-grammar/#S2.1)
+ # Leading and trailing whitespace is removed, and multiple spaces within the string are collapsed down to single spaces.
+ #
+ def normalize_whitespace
+ traverse do |element|
+ next if element === self
+
+ imported_element = self.class.import element
+ next unless imported_element.respond_to? :normalize_whitespace
+
+ imported_element.normalize_whitespace
+ element.swap imported_element
+ end
+ end
+
+ ##
+ # Checks the grammar for a match against an input string
+ #
+ # @param [String] other the input string to check for a match with the grammar
+ #
+ # @return [NoMatch, Match] depending on the result of a match attempt. If a match can be found, it will be returned with appropriate mode/confidence/utterance and interpretation attributes
+ #
+ # @example A grammar that takes a 4 digit pin terminated by hash, or the *9 escape sequence
+ # ```ruby
+ # grammar = RubySpeech::GRXML.draw :mode => :dtmf, :root => 'pin' do
+ # rule :id => 'digit' do
+ # one_of do
+ # ('0'..'9').map { |d| item { d } }
+ # end
+ # end
+ #
+ # rule :id => 'pin', :scope => 'public' do
+ # one_of do
+ # item do
+ # item :repeat => '4' do
+ # ruleref :uri => '#digit'
+ # end
+ # "#"
+ # end
+ # item do
+ # "\* 9"
+ # end
+ # end
+ # end
+ # end
+ #
+ # >> subject.match '*9'
+ # => #<RubySpeech::GRXML::Match:0x00000100ae5d98
+ # @mode = :dtmf,
+ # @confidence = 1,
+ # @utterance = "*9",
+ # @interpretation = "*9"
+ # >
+ # >> subject.match '1234#'
+ # => #<RubySpeech::GRXML::Match:0x00000100b7e020
+ # @mode = :dtmf,
+ # @confidence = 1,
+ # @utterance = "1234#",
+ # @interpretation = "1234#"
+ # >
+ # >> subject.match '111'
+ # => #<RubySpeech::GRXML::NoMatch:0x00000101371660>
+ #
+ # ```
+ #
+ def match(other)
+ regex = to_regexp
+ return NoMatch.new if regex == //
+ match = regex.match other
+ return NoMatch.new unless match
+
+ Match.new :mode => mode,
+ :confidence => dtmf? ? 1 : 0,
+ :utterance => other,
+ :interpretation => interpret_utterance(other)
+ end
+
+ ##
+ # Converts the grammar into a regular expression for matching
+ #
+ # @return [Regexp] a regular expression which is equivalent to the grammar
+ #
+ def to_regexp
+ /^#{regexp_content.join}$/
+ end
+
+ def regexp_content
+ root_rule.children.map &:regexp_content
+ end
+
+ def dtmf?
+ mode == :dtmf
+ end
+
+ def voice?
+ mode == :voice
+ end
+
+ def <<(arg)
+ raise InvalidChildError, "A Grammar can only accept Rule and Tag as children" unless VALID_CHILD_TYPES.include? arg.class
+ super
+ end
+
def eql?(o)
super o, :language, :base_uri, :mode, :root
end
+
+ def embed(other)
+ raise InvalidChildError, "Embedded grammars must have the same mode" if other.is_a?(self.class) && other.mode != mode
+ super
+ end
+
+ private
+
+ def has_matching_root_rule?
+ !root || root_rule
+ end
+
+ def interpret_utterance(utterance)
+ conversion = Hash.new { |hash, key| hash[key] = key }
+ conversion['*'] = 'star'
+ conversion['#'] = 'pound'
+
+ utterance.chars.inject [] do |array, digit|
+ array << "dtmf-#{conversion[digit]}"
+ end.join ' '
+ end
+
+ def split_tokens(element)
+ element.to_s.split(/(\".*\")/).reject(&:empty?).map do |string|
+ match = string.match /^\"(.*)\"$/
+ match ? match[1] : string.split(' ')
+ end.flatten
+ end
end # Grammar
end # GRXML
end # RubySpeech
View
23 lib/ruby_speech/grxml/item.rb
@@ -32,17 +32,6 @@ class Item < Element
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, OneOf, Item, String, Ruleref, Tag, Token].freeze
##
- # Create a new GRXML item element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Item] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- super 'item', atts, &block
- end
-
- ##
#
# The optional weight attribute
#
@@ -83,6 +72,7 @@ def repeat
# @param [String] r
#
def repeat=(r)
+ r = "#{r.min}-#{r.max}" if r.is_a?(Range)
r = r.to_s
error = ArgumentError.new "A Item's repeat must be 0 or a positive integer"
@@ -130,6 +120,17 @@ def <<(arg)
def eql?(o)
super o, :weight, :repeat
end
+
+ def regexp_content # :nodoc:
+ return super unless repeat
+
+ if repeat.include?('-')
+ min, max = repeat.split '-'
+ "#{super}{#{min},#{max}}"
+ else
+ "#{super}{#{repeat}}"
+ end
+ end
end # Item
end # GRXML
end # RubySpeech
View
16 lib/ruby_speech/grxml/match.rb
@@ -0,0 +1,16 @@
+module RubySpeech
+ module GRXML
+ class Match
+ attr_accessor :mode, :confidence, :utterance, :interpretation
+
+ def initialize(options = {})
+ options.each_pair { |k, v| self.send :"#{k}=", v }
+ end
+
+ def eql?(o)
+ o.is_a?(self.class) && [:mode, :confidence, :utterance, :interpretation].all? { |f| self.__send__(f) == o.__send__(f) }
+ end
+ alias :== :eql?
+ end
+ end
+end
View
10 lib/ruby_speech/grxml/no_match.rb
@@ -0,0 +1,10 @@
+module RubySpeech
+ module GRXML
+ class NoMatch
+ def eql?(o)
+ o.is_a? self.class
+ end
+ alias :== :eql?
+ end
+ end
+end
View
15 lib/ruby_speech/grxml/one_of.rb
@@ -18,21 +18,14 @@ class OneOf < Element
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, Item].freeze
- ##
- # Create a new GRXML one-of element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [OneOf] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- super 'one-of', atts, &block
- end
-
def <<(arg)
raise InvalidChildError, "A OneOf can only accept Item as children" unless VALID_CHILD_TYPES.include? arg.class
super
end
+
+ def regexp_content # :nodoc:
+ "(#{children.map(&:regexp_content).join '|'})"
+ end
end # OneOf
end # GRXML
end # RubySpeech
View
11 lib/ruby_speech/grxml/rule.rb
@@ -22,17 +22,6 @@ class Rule < Element
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, OneOf, Item, Ruleref, Tag, Token].freeze
##
- # Create a new GRXML rule element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Rule] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- super 'rule', atts, &block
- end
-
- ##
#
# The id attribute is the unique name to identify the rule
#
View
11 lib/ruby_speech/grxml/ruleref.rb
@@ -19,17 +19,6 @@ class Ruleref < Element
register :ruleref
##
- # Create a new GRXML ruleref element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Ruleref] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- super 'ruleref', atts, &block
- end
-
- ##
# XML URI: in the XML Form of this specification any URI is provided as an attribute to an element; for example the ruleref and lexicon elements.
#
# @return [String]
View
11 lib/ruby_speech/grxml/tag.rb
@@ -16,17 +16,6 @@ class Tag < Element
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
- ##
- # Create a new GRXML tag element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Tag] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- super 'tag', atts, &block
- end
-
def <<(arg)
raise InvalidChildError, "A Tag can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
super
View
19 lib/ruby_speech/grxml/token.rb
@@ -14,21 +14,18 @@ class Token < Element
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
- ##
- # Create a new GRXML token element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Token] an element for use in an GRXML document
- #
- def self.new(atts = {}, &block)
- super 'token', atts, &block
- end
-
def <<(arg)
raise InvalidChildError, "A Token can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
super
end
+
+ def normalize_whitespace
+ self.content = content.strip.squeeze ' '
+ end
+
+ def regexp_content # :nodoc:
+ Regexp.escape content
+ end
end # Token
end # GRXML
end # RubySpeech
View
6 lib/ruby_speech/ssml.rb
@@ -5,11 +5,17 @@ module SSML
eager_autoload do
autoload :Audio
autoload :Break
+ autoload :Desc
autoload :Element
autoload :Emphasis
+ autoload :Mark
+ autoload :P
+ autoload :Phoneme
autoload :Prosody
+ autoload :S
autoload :SayAs
autoload :Speak
+ autoload :Sub
autoload :Voice
end
View
13 lib/ruby_speech/ssml/audio.rb
@@ -16,18 +16,7 @@ class Audio < Element
register :audio
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text,String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
-
- ##
- # Create a new SSML audio element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Break] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- super 'audio', atts, &block
- end
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Desc, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
##
# The URI of a document with an appropriate MIME type
View
11 lib/ruby_speech/ssml/break.rb
@@ -12,17 +12,6 @@ class Break < Element
VALID_STRENGTHS = [:none, :'x-weak', :weak, :medium, :strong, :'x-strong'].freeze
##
- # Create a new SSML break element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Break] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- super 'break', atts, &block
- end
-
- ##
# This attribute is used to indicate the strength of the prosodic break in the speech output. The value "none" indicates that no prosodic break boundary should be outputted, which can be used to prevent a prosodic break which the processor would otherwise produce. The other values indicate monotonically non-decreasing (conceptually increasing) break strength between words. The stronger boundaries are typically accompanied by pauses. "x-weak" and "x-strong" are mnemonics for "extra weak" and "extra strong", respectively.
#
# @return [Symbol]
View
24 lib/ruby_speech/ssml/desc.rb
@@ -0,0 +1,24 @@
+module RubySpeech
+ module SSML
+ ##
+ # The emphasis element requests that the contained text be spoken with emphasis (also referred to as prominence or stress). The synthesis processor determines how to render emphasis since the nature of emphasis differs between languages, dialects or even voices.
+ #
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.2
+ #
+ class Desc < Element
+
+ register :desc
+
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
+
+ def <<(arg)
+ raise InvalidChildError, "A Desc can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
+ super
+ end
+
+ def eql?(o)
+ super o, :language
+ end
+ end # Desc
+ end # SSML
+end # RubySpeech
View
13 lib/ruby_speech/ssml/emphasis.rb
@@ -10,18 +10,7 @@ class Emphasis < Element
register :emphasis
VALID_LEVELS = [:strong, :moderate, :none, :reduced].freeze
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
-
- ##
- # Create a new SSML emphasis element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Emphasis] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- super 'emphasis', atts, &block
- end
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice].freeze
##
# Indicates the strength of emphasis to be applied. Defined values are "strong", "moderate", "none" and "reduced". The default level is "moderate". The meaning of "strong" and "moderate" emphasis is interpreted according to the language being spoken (languages indicate emphasis using a possible combination of pitch change, timing changes, loudness and other acoustic differences). The "reduced" level is effectively the opposite of emphasizing a word. For example, when the phrase "going to" is reduced it may be spoken as "gonna". The "none" level is used to prevent the synthesis processor from emphasizing words that it might typically emphasize. The values "none", "moderate", and "strong" are monotonically non-decreasing in strength.
View
43 lib/ruby_speech/ssml/mark.rb
@@ -0,0 +1,43 @@
+module RubySpeech
+ module SSML
+ ##
+ # A mark element is an empty element that places a marker into the text/tag sequence. It has one required attribute, name, which is of type xsd:token [SCHEMA2 §3.3.2]. The mark element can be used to reference a specific location in the text/tag sequence, and can additionally be used to insert a marker into an output stream for asynchronous notification. When processing a mark element, a synthesis processor must do one or both of the following:
+ #
+ # * inform the hosting environment with the value of the name attribute and with information allowing the platform to retrieve the corresponding position in the rendered output.
+ # * when audio output of the SSML document reaches the mark, issue an event that includes the required name attribute of the element. The hosting environment defines the destination of the event.
+ #
+ # The mark element does not affect the speech output process.
+ #
+ # http://www.w3.org/TR/speech-synthesis/#S3.3.2
+ #
+ class Mark < Element
+
+ register :mark
+
+ ##
+ # This attribute is a token by which to reference the mark
+ #
+ # @return [String]
+ #
+ def name
+ read_attr :name
+ end
+
+ ##
+ # @param [String] the name token
+ #
+ def name=(other)
+ write_attr :name, other
+ end
+
+ def <<(*args)
+ raise InvalidChildError, "A Mark cannot contain children"
+ super
+ end
+
+ def eql?(o)
+ super o, :name
+ end
+ end # Mark
+ end # SSML
+end # RubySpeech
View
25 lib/ruby_speech/ssml/p.rb
@@ -0,0 +1,25 @@
+module RubySpeech
+ module SSML
+ ##
+ # A p element represents a paragraph.
+ # The use of p elements is optional. Where text occurs without an enclosing p element the synthesis processor should attempt to determine the structure using language-specific knowledge of the format of plain text.
+ #
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.7
+ #
+ class P < Element
+
+ register :p
+
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
+
+ def <<(arg)
+ raise InvalidChildError, "A P can only accept String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
+ super
+ end
+
+ def eql?(o)
+ super o, :language
+ end
+ end # P
+ end # SSML
+end # RubySpeech
View
72 lib/ruby_speech/ssml/phoneme.rb
@@ -0,0 +1,72 @@
+module RubySpeech
+ module SSML
+ ##
+ # The phoneme element provides a phonemic/phonetic pronunciation for the contained text. The phoneme element may be empty. However, it is recommended that the element contain human-readable text that can be used for non-spoken rendering of the document. For example, the content may be displayed visually for users with hearing impairments.
+ #
+ # The ph attribute is a required attribute that specifies the phoneme/phone string.
+ #
+ # This element is designed strictly for phonemic and phonetic notations and is intended to be used to provide pronunciations for words or very short phrases. The phonemic/phonetic string does not undergo text normalization and is not treated as a token for lookup in the lexicon (see Section 3.1.4), while values in say-as and sub may undergo both. Briefly, phonemic strings consist of phonemes, language-dependent speech units that characterize linguistically significant differences in the language; loosely, phonemes represent all the sounds needed to distinguish one word from another in a given language. On the other hand, phonetic strings consist of phones, speech units that characterize the manner (puff of air, click, vocalized, etc.) and place (front, middle, back, etc.) of articulation within the human vocal tract and are thus independent of language; phones represent realized distinctions in human speech production.
+ #
+ # The alphabet attribute is an optional attribute that specifies the phonemic/phonetic alphabet. An alphabet in this context refers to a collection of symbols to represent the sounds of one or more human languages. The only valid values for this attribute are "ipa" (see the next paragraph) and vendor-defined strings of the form "x-organization" or "x-organization-alphabet". For example, the Japan Electronics and Information Technology Industries Association [JEITA] might wish to encourage the use of an alphabet such as "x-JEITA" or "x-JEITA-2000" for their phoneme alphabet [JEIDAALPHABET].
+ #
+ # Synthesis processors should support a value for alphabet of "ipa", corresponding to Unicode representations of the phonetic characters developed by the International Phonetic Association [IPA]. In addition to an exhaustive set of vowel and consonant symbols, this character set supports a syllable delimiter, numerous diacritics, stress symbols, lexical tone symbols, intonational markers and more. For this alphabet, legal ph values are strings of the values specified in Appendix 2 of [IPAHNDBK]. Informative tables of the IPA-to-Unicode mappings can be found at [IPAUNICODE1] and [IPAUNICODE2]. Note that not all of the IPA characters are available in Unicode. For processors supporting this alphabet,
+ #
+ # * The processor must syntactically accept all legal ph values.
+ # * The processor should produce output when given Unicode IPA codes that can reasonably be considered to belong to the current language.
+ # * The production of output when given other codes is entirely at processor discretion.
+ #
+ # It is an error if a value for alphabet is specified that is not known or cannot be applied by a synthesis processor. The default behavior when the alphabet attribute is left unspecified is processor-specific.
+ #
+ # The phoneme element itself can only contain text (no elements).
+ #
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.9
+ #
+ class Phoneme < Element
+
+ register :phoneme
+
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
+
+ ##
+ # Specifies the phonemic/phonetic alphabet
+ #
+ # @return [String]
+ #
+ def alphabet
+ read_attr :alphabet
+ end
+
+ ##
+ # @param [String] other the phonemic/phonetic alphabet
+ #
+ def alphabet=(other)
+ write_attr :alphabet, other
+ end
+
+ ##
+ # Specifies the phoneme/phone string.
+ #
+ # @return [String]
+ #
+ def ph
+ read_attr :ph
+ end
+
+ ##
+ # @param [String] other the phoneme/phone string.
+ #
+ def ph=(other)
+ write_attr :ph, other
+ end
+
+ def <<(arg)
+ raise InvalidChildError, "A Phoneme can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
+ super
+ end
+
+ def eql?(o)
+ super o, :alphabet, :ph
+ end
+ end # Phoneme
+ end # SSML
+end # RubySpeech
View
13 lib/ruby_speech/ssml/prosody.rb
@@ -18,18 +18,7 @@ class Prosody < Element
VALID_PITCHES = [:'x-low', :low, :medium, :high, :'x-high', :default].freeze
VALID_VOLUMES = [:silent, :'x-soft', :soft, :medium, :loud, :'x-loud', :default].freeze
VALID_RATES = [:'x-slow', :slow, :medium, :fast, :'x-fast', :default].freeze
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
-
- ##
- # Create a new SSML prosody element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Prosody] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- super 'prosody', atts, &block
- end
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
##
# The baseline pitch for the contained text. Although the exact meaning of "baseline pitch" will vary across synthesis processors, increasing/decreasing this value will typically increase/decrease the approximate pitch of the output. Legal values are: a number followed by "Hz", a relative change or "x-low", "low", "medium", "high", "x-high", or "default". Labels "x-low" through "x-high" represent a sequence of monotonically non-decreasing pitch levels.
View
25 lib/ruby_speech/ssml/s.rb
@@ -0,0 +1,25 @@
+module RubySpeech
+ module SSML
+ ##
+ # As s element represents a sentence.
+ # The use of s elements is optional. Where text occurs without an enclosing s element the synthesis processor should attempt to determine the structure using language-specific knowledge of the format of plain text.
+ #
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.7
+ #
+ class S < Element
+
+ register :s
+
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice].freeze
+
+ def <<(arg)
+ raise InvalidChildError, "An S can only accept String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
+ super
+ end
+
+ def eql?(o)
+ super o, :language
+ end
+ end # S
+ end # SSML
+end # RubySpeech
View
11 lib/ruby_speech/ssml/say_as.rb
@@ -27,17 +27,6 @@ class SayAs < Element
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
##
- # Create a new SSML say-as element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Prosody] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- super 'say-as', atts, &block
- end
-
- ##
#
# The interpret-as attribute indicates the content type of the contained text construct. Specifying the content type helps the synthesis processor to distinguish and interpret text constructs that may be rendered in different ways depending on what type of information is intended.
#
View
46 lib/ruby_speech/ssml/speak.rb
@@ -10,57 +10,15 @@ class Speak < Element
register :speak
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
+ self.defaults = { :version => '1.0', :language => "en-US" }
- ##
- # Create a new SSML speak root element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Speak] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- new_node = super('speak', atts)
- new_node[:version] = '1.0'
- new_node.namespace = SSML_NAMESPACE
- new_node.language ||= "en-US"
- new_node.eval_dsl_block &block
- new_node
- end
-
- ##
- # @return [String] the base URI to which relative URLs are resolved
- #
- def base_uri
- read_attr :base
- end
-
- ##
- # @param [String] uri the base URI to which relative URLs are resolved
- #
- def base_uri=(uri)
- write_attr 'xml:base', uri
- end
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
def <<(arg)
raise InvalidChildError, "A Speak can only accept String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
super
end
- def to_doc
- Nokogiri::XML::Document.new.tap do |doc|
- doc << self
- end
- end
-
- def +(other)
- self.class.new(:base_uri => base_uri).tap do |new_speak|
- (self.children + other.children).each do |child|
- new_speak << child
- end
- end
- end
-
def eql?(o)
super o, :language, :base_uri
end
View
42 lib/ruby_speech/ssml/sub.rb
@@ -0,0 +1,42 @@
+module RubySpeech
+ module SSML
+ ##
+ # The sub element is employed to indicate that the text in the alias attribute value replaces the contained text for pronunciation. This allows a document to contain both a spoken and written form. The required alias attribute specifies the string to be spoken instead of the enclosed string. The processor should apply text normalization to the alias value.
+ #
+ # The sub element can only contain text (no elements).
+ #
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.10
+ #
+ class Sub < Element
+
+ register :sub
+
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
+
+ ##
+ # Indicates the string to be spoken instead of the enclosed string
+ #
+ # @return [String]
+ #
+ def alias
+ read_attr :alias
+ end
+
+ ##
+ # @param [String] other the string to be spoken instead of the enclosed string
+ #
+ def alias=(other)
+ write_attr :alias, other
+ end
+
+ def <<(arg)
+ raise InvalidChildError, "A Sub can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
+ super
+ end
+
+ def eql?(o)
+ super o, :alias
+ end
+ end # Sub
+ end # SSML
+end # RubySpeech
View
13 lib/ruby_speech/ssml/voice.rb
@@ -11,18 +11,7 @@ class Voice < Element
register :voice
VALID_GENDERS = [:male, :female, :neutral].freeze
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio,Break, Emphasis, Prosody, SayAs, Voice].freeze
-
- ##
- # Create a new SSML voice element
- #
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
- #
- # @return [Voice] an element for use in an SSML document
- #
- def self.new(atts = {}, &block)
- super 'voice', atts, &block
- end
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
##
# Indicates the preferred gender of the voice to speak the contained text. Enumerated values are: "male", "female", "neutral".
View
2  lib/ruby_speech/version.rb
@@ -1,3 +1,3 @@
module RubySpeech
- VERSION = "0.4.0"
+ VERSION = "0.5.0"
end
View
513 spec/ruby_speech/grxml/grammar_spec.rb
@@ -18,13 +18,17 @@ module GRXML
end
describe "setting dtmf mode" do
- subject { Grammar.new :mode => 'dtmf' }
- its(:mode) { should == 'dtmf' }
+ subject { Grammar.new :mode => 'dtmf' }
+ its(:mode) { should == :dtmf }
+ its(:dtmf?) { should be true }
+ its(:voice?) { should be false }
end
describe "setting voice mode" do
- subject { Grammar.new :mode => 'voice' }
- its(:mode) { should == 'voice' }
+ subject { Grammar.new :mode => 'voice' }
+ its(:mode) { should == :voice }
+ its(:voice?) { should be true }
+ its(:dtmf?) { should be false }
end
it 'registers itself' do
@@ -38,13 +42,13 @@ module GRXML
http://www.w3.org/TR/speech-grammar/grammar.xsd"
xmlns="http://www.w3.org/2001/06/grammar" />' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of Grammar }
- its(:language) { pending; should == 'jp' }
+ its(:language) { should == 'jp' }
its(:base_uri) { should == 'blah' }
- its(:mode) { should == 'dtmf' }
+ its(:mode) { should == :dtmf }
its(:root) { should == 'main_rule' }
end
@@ -162,52 +166,491 @@ module GRXML
grammar.root_rule.should == foo
end
- it "should allow inlining rule references" do
- grammar = GRXML.draw :root => 'pin', :mode => :dtmf do
- rule :id => 'digits' do
- one_of do
- 0.upto(9) { |d| item { d.to_s } }
+ describe "inlining rule references" do
+ let :grammar do
+ GRXML.draw :root => 'pin', :mode => :dtmf do
+ rule :id => 'digits' do
+ one_of do
+ 0.upto(9) { |d| item { d.to_s } }
+ end
+ end
+
+ rule :id => 'pin', :scope => 'public' do
+ one_of do
+ item do
+ item :repeat => '4' do
+ ruleref :uri => '#digits'
+ end
+ "#"
+ end
+ item do
+ "* 9"
+ end
+ end
end
end
+ end
- rule :id => 'pin', :scope => 'public' do
- one_of do
- item do
- item :repeat => '4' do
- ruleref :uri => '#digits'
+ let :inline_grammar do
+ GRXML.draw :root => 'pin', :mode => :dtmf do
+ rule :id => 'pin', :scope => 'public' do
+ one_of do
+ item do
+ item :repeat => '4' do
+ one_of do
+ 0.upto(9) { |d| item { d.to_s } }
+ end
+ end
+ "#"
end
- "#"
+ item do
+ "* 9"
+ end
+ end
+ end
+ end
+ end
+
+ it "should be possible in a non-destructive manner" do
+ grammar.inline.should == inline_grammar
+ grammar.should_not == inline_grammar
+ end
+
+ it "should be possible in a destructive manner" do
+ grammar.inline!.should == inline_grammar
+ grammar.should == inline_grammar
+ end
+ end
+
+ describe "#tokenize!" do
+ def single_rule_grammar(content = [])
+ GRXML.draw :root => 'm', :mode => :speech do
+ rule :id => 'm' do
+ Array(content).each { |e| embed e }
+ end
+ end
+ end
+
+ subject { single_rule_grammar content }
+
+ let(:tokenized_version) do
+ expected_tokens = Array(tokens).map do |s|
+ Token.new.tap { |t| t << s }
+ end
+ single_rule_grammar expected_tokens
+ end
+
+ before { subject.tokenize! }
+
+ context "with a single unquoted token" do
+ let(:content) { 'hello' }
+ let(:tokens) { 'hello' }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with a single unquoted token (non-alphabetic)" do
+ let(:content) { '2' }
+ let(:tokens) { ['2'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with a single quoted token (including whitespace)" do
+ let(:content) { '"San Francisco"' }
+ let(:tokens) { ['San Francisco'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with a single quoted token (no whitespace)" do
+ let(:content) { '"hello"' }
+ let(:tokens) { ['hello'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with two tokens delimited by white space" do
+ let(:content) { 'bon voyage' }
+ let(:tokens) { ['bon', 'voyage'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with four tokens delimited by white space" do
+ let(:content) { 'this is a test' }
+ let(:tokens) { ['this', 'is', 'a', 'test'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with a single XML token" do
+ let(:content) { [Token.new.tap { |t| t << 'San Francisco' }] }
+ let(:tokens) { ['San Francisco'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+
+ context "with a mixture of token types" do
+ let(:content) do
+ [
+ 'Welcome to "San Francisco"',
+ Token.new.tap { |t| t << 'Have Fun!' }
+ ]
+ end
+
+ let(:tokens) { ['Welcome', 'to', 'San Francisco', 'Have Fun!'] }
+
+ it "should tokenize correctly" do
+ should == tokenized_version
+ end
+ end
+ end
+
+ describe "#normalize_whitespace" do
+ it "should normalize whitespace in all of the tokens contained within it" do
+ grammar = GRXML.draw do
+ rule do
+ token { ' Welcome to ' }
+ token { ' San Francisco ' }
+ end
+ end
+
+ normalized_grammar = GRXML.draw do
+ rule do
+ token { 'Welcome to' }
+ token { 'San Francisco' }
+ end
+ end
+
+ grammar.should_not == normalized_grammar
+ grammar.normalize_whitespace
+ grammar.should == normalized_grammar
+ end
+ end
+
+ describe "matching against an input string" do
+ before do
+ subject.inline!
+ subject.tokenize!
+ subject.normalize_whitespace
+ end
+
+ context "with a grammar that takes a single specific digit" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digit' do
+ rule :id => 'digit' do
+ '6'
end
- item do
- "* 9"
+ end
+ end
+
+ it "should match '6'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '6',
+ :interpretation => 'dtmf-6'
+ subject.match('6').should == expected_match
+ end
+
+ %w{1 2 3 4 5 7 8 9 10 66 26 61}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes two specific digits" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ '5 6'
end
end
end
+
+ it "should match '56'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '56',
+ :interpretation => 'dtmf-5 dtmf-6'
+ subject.match('56').should == expected_match
+ end
+
+ %w{* *7 #6 6* 1 2 3 4 5 6 7 8 9 10 65 57 46 26 61}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
end
- expected_grammar = GRXML.draw :root => 'pin', :mode => :dtmf do
- rule :id => 'pin', :scope => 'public' do
- one_of do
- item do
- item :repeat => '4' do
- one_of do
- 0.upto(9) { |d| item { d.to_s } }
- end
+ context "with a grammar that takes star and a digit" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ '* 6'
+ end
+ end
+ end
+
+ it "should match '*6'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '*6',
+ :interpretation => 'dtmf-star dtmf-6'
+ subject.match('*6').should == expected_match
+ end
+
+ %w{* *7 #6 6* 1 2 3 4 5 6 7 8 9 10 66 26 61}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes hash and a digit" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ '# 6'
+ end
+ end
+ end
+
+ it "should match '#6'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '#6',
+ :interpretation => 'dtmf-pound dtmf-6'
+ subject.match('#6').should == expected_match
+ end
+
+ %w{* *6 #7 6* 1 2 3 4 5 6 7 8 9 10 66 26 61}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes two specific digits, via a ruleref, and whitespace normalization" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ ruleref :uri => '#star'
+ '" 6 "'
+ end
+
+ rule :id => 'star' do
+ '" * "'
+ end
+ end
+ end
+
+ it "should match '*6'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '*6',
+ :interpretation => 'dtmf-star dtmf-6'
+ subject.match('*6').should == expected_match
+ end
+
+ %w{* *7 #6 6* 1 2 3 4 5 6 7 8 9 10 66 26 61}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes two specific digits with the second being an alternative" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ string '*'
+ one_of do
+ item { '6' }
+ item { '7' }
end
- "#"
end
- item do
- "* 9"
+ end
+ end
+
+ it "should match '*6'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '*6',
+ :interpretation => 'dtmf-star dtmf-6'
+ subject.match('*6').should == expected_match
+ end
+
+ it "should match '*7'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '*7',
+ :interpretation => 'dtmf-star dtmf-7'
+ subject.match('*7').should == expected_match
+ end
+
+ %w{* *8 #6 6* 1 2 3 4 5 6 7 8 9 10 66 26 61}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes a specific digit, followed by a specific digit repeated an exact number of times" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ string '1'
+ item :repeat => 2 do
+ '6'
+ end
end
end
end
+
+ it "should match '166'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '166',
+ :interpretation => 'dtmf-1 dtmf-6 dtmf-6'
+ subject.match('166').should == expected_match
+ end
+
+ %w{1 16 1666 16666 17}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
end
- grammar.inline.should == expected_grammar
- end
+ context "with a grammar that takes a specific digit, followed by a specific digit repeated within a range" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ string '1'
+ item :repeat => 0..3 do
+ '6'
+ end
+ end
+ end
+ end
+
+ {
+ '1' => 'dtmf-1',
+ '16' => 'dtmf-1 dtmf-6',
+ '166' => 'dtmf-1 dtmf-6 dtmf-6',
+ '1666' => 'dtmf-1 dtmf-6 dtmf-6 dtmf-6'
+ }.each_pair do |input, interpretation|
+ it "should match '#{input}'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => input,
+ :interpretation => interpretation
+ subject.match(input).should == expected_match
+ end
+ end
+
+ %w{6 16666 17}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes a specific digit, followed by a specific digit repeated a minimum number of times" do
+ subject do
+ GRXML.draw :mode => :dtmf, :root => 'digits' do
+ rule :id => 'digits' do
+ string '1'
+ item :repeat => '2-' do
+ '6'
+ end
+ end
+ end
+ end
+
+ {
+ '166' => 'dtmf-1 dtmf-6 dtmf-6',
+ '1666' => 'dtmf-1 dtmf-6 dtmf-6 dtmf-6',
+ '16666' => 'dtmf-1 dtmf-6 dtmf-6 dtmf-6 dtmf-6'
+ }.each_pair do |input, interpretation|
+ it "should match '#{input}'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => input,
+ :interpretation => interpretation
+ subject.match(input).should == expected_match
+ end
+ end
+
+ %w{1 16 17}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
+
+ context "with a grammar that takes a 4 digit pin terminated by hash, or the *9 escape sequence" do
+ subject do
+ RubySpeech::GRXML.draw :mode => :dtmf, :root => 'pin' do
+ rule :id => 'digit' do
+ one_of do
+ ('0'..'9').map { |d| item { d } }
+ end
+ end
+
+ rule :id => 'pin', :scope => 'public' do
+ one_of do
+ item do
+ item :repeat => '4' do
+ ruleref :uri => '#digit'
+ end
+ "#"
+ end
+ item do
+ "\* 9"
+ end
+ end
+ end
+ end
+ end
- describe "#tokens" do
- context "with unquoted tokens"
+ {
+ '*9' => 'dtmf-star dtmf-9',
+ '1234#' => 'dtmf-1 dtmf-2 dtmf-3 dtmf-4 dtmf-pound',
+ '5678#' => 'dtmf-5 dtmf-6 dtmf-7 dtmf-8 dtmf-pound',
+ '1111#' => 'dtmf-1 dtmf-1 dtmf-1 dtmf-1 dtmf-pound'
+ }.each_pair do |input, interpretation|
+ it "should match '#{input}'" do
+ expected_match = GRXML::Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => input,
+ :interpretation => interpretation
+ subject.match(input).should == expected_match
+ end
+ end
+
+ %w{111}.each do |input|
+ it "should not match '#{input}'" do
+ subject.match(input).should == GRXML::NoMatch.new
+ end
+ end
+ end
end
end # Grammar
end # GRXML
View
7 spec/ruby_speech/grxml/item_spec.rb
@@ -17,7 +17,7 @@ module GRXML
describe "everything from a document" do
let(:document) { '<item weight="1.1" repeat="1">one</item>' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of Item }
@@ -28,7 +28,7 @@ module GRXML
describe "#weight" do
context "from a document" do
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
describe "using .1" do
let(:document) { '<item weight=".1" repeat="1">one</item>' }
@@ -85,6 +85,7 @@ module GRXML
it "valid ranges from m to n" do
lambda { subject.repeat = '1-5' }.should_not raise_error
lambda { subject.repeat = '0-5' }.should_not raise_error
+ lambda { subject.repeat = 0..5 }.should_not raise_error
end
it "illegal ranges from m to n" do
@@ -92,6 +93,8 @@ module GRXML
lambda { subject.repeat = '-1-2' }.should raise_error(ArgumentError, "A Item's repeat must be 0 or a positive integer")
lambda { subject.repeat = '1-2-3' }.should raise_error(ArgumentError, "A Item's repeat must be 0 or a positive integer")
lambda { subject.repeat = '1-B' }.should raise_error(ArgumentError, "A Item's repeat must be 0 or a positive integer")
+ lambda { subject.repeat = -1..2 }.should raise_error(ArgumentError, "A Item's repeat must be 0 or a positive integer")
+ lambda { subject.repeat = 1..-2 }.should raise_error(ArgumentError, "A Item's repeat must be 0 or a positive integer")
end
it "valid ranges of m or more" do
View
49 spec/ruby_speech/grxml/match_spec.rb
@@ -0,0 +1,49 @@
+require 'spec_helper'
+
+module RubySpeech
+ module GRXML
+ describe Match do
+ subject do
+ Match.new :mode => :dtmf,
+ :confidence => 1,
+ :utterance => '6',
+ :interpretation => 'foo'
+ end
+
+ its(:mode) { should == :dtmf }
+ its(:confidence) { should == 1 }
+ its(:utterance) { should == '6' }
+ its(:interpretation) { should == 'foo' }
+
+ describe "equality" do
+ it "should be equal when mode, confidence, utterance and interpretation are the same" do
+ Match.new(:mode => :dtmf, :confidence => 1, :utterance => '6', :interpretation => 'foo').should == Match.new(:mode => :dtmf, :confidence => 1, :utterance => '6', :interpretation => 'foo')
+ end
+
+ describe "when the mode is different" do
+ it "should not be equal" do
+ Match.new(:mode => :dtmf).should_not == Match.new(:mode => :speech)
+ end
+ end
+
+ describe "when the confidence is different" do
+ it "should not be equal" do
+ Match.new(:confidence => 1).should_not == Match.new(:confidence => 0)
+ end
+ end
+
+ describe "when the utterance is different" do
+ it "should not be equal" do
+ Match.new(:utterance => '6').should_not == Match.new(:utterance => 'foo')
+ end
+ end
+
+ describe "when the interpretation is different" do
+ it "should not be equal" do
+ Match.new(:interpretation => 'foo').should_not == Match.new(:interpretation => 'bar')
+ end
+ end
+ end
+ end
+ end
+end
View
17 spec/ruby_speech/grxml/no_match_spec.rb
@@ -0,0 +1,17 @@
+require 'spec_helper'
+
+module RubySpeech
+ module GRXML
+ describe NoMatch do
+ describe "equality" do
+ it "should be equal to another NoMatch" do
+ NoMatch.new.should == NoMatch.new
+ end
+
+ it "should not equal a match" do
+ NoMatch.new.should_not == Match.new
+ end
+ end
+ end
+ end
+end
View
2  spec/ruby_speech/grxml/one_of_spec.rb
@@ -12,7 +12,7 @@ module GRXML
describe "from a document" do
let(:document) { '<one-of> <item>test</item> </one-of>' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of OneOf }
end
View
2  spec/ruby_speech/grxml/rule_spec.rb
@@ -17,7 +17,7 @@ module GRXML
describe "from a document" do
let(:document) { '<rule id="one" scope="public"> <item /> </rule>' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of Rule }
View
2  spec/ruby_speech/grxml/ruleref_spec.rb
@@ -15,7 +15,7 @@ module GRXML
describe "from a document" do
let(:document) { '<ruleref uri="#one" />' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of Ruleref }
View
2  spec/ruby_speech/grxml/tag_spec.rb
@@ -12,7 +12,7 @@ module GRXML
describe "from a document" do
let(:document) { '<tag>hello</tag>' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of Tag }
View
12 spec/ruby_speech/grxml/token_spec.rb
@@ -12,7 +12,7 @@ module GRXML
describe "from a document" do
let(:document) { '<token>hello</token>' }
- subject { Element.import parse_xml(document).root }
+ subject { Element.import document }
it { should be_instance_of Token }
@@ -25,6 +25,16 @@ module GRXML
its(:language) { should == 'jp' }
end
+ describe "#normalize_whitespace" do
+ it "should remove leading & trailing whitespace and collapse multiple spaces down to 1" do
+ element = Element.import '<token> Welcome to San Francisco </token>'
+
+ element.normalize_whitespace
+
+ element.content.should == 'Welcome to San Francisco'
+ end
+ end
+
describe "comparing objects" do
it "should be equal if the content is the same" do
Token.new(:content => "hello").should == Token.new(:content => "hello")
View
69 spec/ruby_speech/grxml_spec.rb
@@ -7,8 +7,16 @@ module RubySpeech
GRXML.draw.should == GRXML::Grammar.new
end
- it "should have a rule with id equal to the root attribute if set" do
- pending 'check that a rule exists with the id equal to root if that attribute is set'
+ context "with a root rule name specified but not found" do
+ it "should raise an error" do
+ lambda do
+ GRXML.draw :root => 'foo' do
+ rule :id => 'bar' do
+ '6'
+ end
+ end
+ end.should raise_error(InvalidChildError, "A GRXML document must have a rule matching the root rule name")
+ end
end
# TODO: Maybe GRXML#draw should create a Rule to pass the string
@@ -95,7 +103,7 @@ def foo
let :doc2 do
doc = doc1
- RubySpeech::GRXML.draw do
+ RubySpeech::GRXML.draw :mode => :dtmf do
embed doc
rule :id => :main do
"Hello Fred"