Skip to content

Commit

Permalink
Merge 74523e6 into 0513414
Browse files Browse the repository at this point in the history
  • Loading branch information
rodfersou committed Jul 8, 2016
2 parents 0513414 + 74523e6 commit b72412b
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Changelog
1.0b2 (unreleased)
------------------

- Review text extraction logic;
it is now possible to ignore parts of the text via a blacklist of CSS classes accessible in the control panel configlet.
[rodfersou, hvelarde]

- To avoid displaying the 'Listen' button with an incorrect voice,
the feature is now globally disabled by default at installation time.
[hvelarde]
Expand Down
21 changes: 21 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,27 @@ Go to the 'Site Setup' page in a Plone site and click on the 'Add-ons' link.

Check the box next to ``collective.texttospeech`` and click the 'Activate' button.

How does it work
----------------

We use JavaScript to extract all text inside an element with ``id="content"`` in the page.
The text extraction will ignore any <iframe> elements present.
Currently, <img> elements are neither processed but that could change in the future.

It is possible to avoid reading some text that,
regardless being present on the text flow,
make little sense on the reading flow.
Examples of this are image captions and side quotes.

We have included a list of CSS classes that can be blacklisted to implement this feature.
The list is configurable via an option in the control panel configlet.
Any text inside an element with one of those CSS classes applied will be ignored.

The blacklist defaults to some CSS classes used in Plone 4:

* ``image-caption``: used for image captions
* ``pullquote``: used for side quotes

Usage
-----

Expand Down
4 changes: 3 additions & 1 deletion src/collective/texttospeech/browser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def enabled(self):
try:
globally_enabled = api.portal.get_registry_record(
interface=ITextToSpeechControlPanel, name='globally_enabled')
except InvalidParameterError:
except (InvalidParameterError, KeyError):
# avoid breaking page rendering if record is not present
# this could happen on upgrades or accidental deletions
globally_enabled = False

return globally_enabled
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
<div i18n:domain="collective.texttospeech"
tal:on-error="nothing"
id="viewlet-texttospeech"
data-label-stopped="Listen"
data-label-playing="Pause"
data-label-paused="Resume"
data-error-message="Could not load ResponsiveVoice library; Text-To-Speech feature is disabled or is not available."
tal:attributes="data-voice view/voice;
data-enabled view/enabled"
data-enabled view/enabled;
data-blacklist view/blacklist"
i18n:attributes="data-label-stopped;
data-label-playing;
data-label-paused;
Expand Down
12 changes: 10 additions & 2 deletions src/collective/texttospeech/browser/viewlets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,13 @@ def enabled(self):

def voice(self):
return api.portal.get_registry_record(
ITextToSpeechControlPanel.__identifier__ + '.voice'
)
ITextToSpeechControlPanel.__identifier__ + '.voice')

def blacklist(self):
css_class_blacklist = api.portal.get_registry_record(
ITextToSpeechControlPanel.__identifier__ + '.css_class_blacklist')

if not css_class_blacklist:
return ''

return ','.join(css_class_blacklist)
6 changes: 6 additions & 0 deletions src/collective/texttospeech/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,10 @@
'News Item'
]

DEFAULT_CSS_CLASS_BLACKLIST = set([
'image-caption', # image captions
'pullquote' # side quotes
# TODO: find out equivalent classes for Plone 5
])

IS_PLONE_5 = api.env.plone_version().startswith('5')
15 changes: 15 additions & 0 deletions src/collective/texttospeech/interfaces.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
from collective.texttospeech import _
from collective.texttospeech.config import DEFAULT_CSS_CLASS_BLACKLIST
from collective.texttospeech.config import DEFAULT_ENABLED_CONTENT_TYPES
from plone.directives import form
from zope import schema
Expand Down Expand Up @@ -38,3 +39,17 @@ class ITextToSpeechControlPanel(form.Schema):
required=True,
default=u'UK English Female',
)

form.widget('css_class_blacklist', cols=25, rows=10)
css_class_blacklist = schema.Set(
title=_(u'CSS class blacklist'),
description=_(
u'A list of CSS class identifiers that will be ignored on speech_synthesis. '
u'Elements with any of these classes directly applied to them, or to a parent element, will be skipped. '
u'Default values include image captions ("image-caption") and side quotes ("pullquote").',
),
required=False,
default=DEFAULT_CSS_CLASS_BLACKLIST,
value_type=schema.ASCIILine(title=_(u'CSS class')),
# TODO: validate values
)
113 changes: 111 additions & 2 deletions src/collective/texttospeech/static/main.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
var MainView = (function() {
function MainView() {
this.$el = $('#viewlet-texttospeech');
this.$el.data('texttospeech', this);
this.$button = $('#texttospeech-button', this.$el);
this.$button.fadeIn();
this.voice = this.$el.attr('data-voice');
this.label_stopped = this.$el.attr('data-label-stopped');
this.label_playing = this.$el.attr('data-label-playing');
this.label_paused = this.$el.attr('data-label-paused');
this.blacklist = this.$el.attr('data-blacklist').split(',');
this.playing = false;
this.paused = true;
this.$button.on('click', $.proxy(this.play_pause, this));
Expand All @@ -23,7 +25,115 @@ var MainView = (function() {
this.$button.html(this.label_stopped);
this.$button.attr('class', 'stopped');
};
MainView.prototype.is_invisible = function($el) {
// check if element is not visible
return $el.is(':visible') === false;
};
MainView.prototype.is_blacklisted = function($el) {
// check if element, or a parent, has applied a class that must be skipped
var i, len, selector;
var ignore = false;
for (i = 0, len = this.blacklist.length; i < len; i++) {
selector = '.' + this.blacklist[i];
if ($el.is(selector) || $el.parents(selector).length > 0) {
ignore = true;
break;
}
}
return ignore;
};
MainView.prototype.is_valid_element = function(el) {
// check if element is a text or any container (ex.: <div> <p>)
// https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType#Node_type_constants
var valid_types = [
Node.ELEMENT_NODE,
Node.TEXT_NODE
];
return valid_types.indexOf(el.nodeType) >= 0;
};
MainView.prototype.has_ending_punctuation = function(text) {
// check if the text ends with a punctuation mark (simplified list)
// http://stackoverflow.com/a/29226668/644075 (complete list)
return /[.,;:!?—]$/.test(text);
};
MainView.prototype.remove_extra_spaces = function(text) {
// remove extra spaces into text
text = text.replace(/\s+/g, ' ');
text = text.trim();
return text
};
MainView.prototype.has_valid_text = function($node) {
// check if node has any valid text element
var i, len, ref, el;
ref = $node.contents();
for (i = 0, len = ref.length; i < len; i++) {
el = ref[i];
if (el.nodeType === Node.TEXT_NODE &&
el.textContent.replace(/[.,;:!?()—\r\n\s]*/g, '') !== '') {
return true;
}
}
return false;
};
MainView.prototype.remove_invisible_items = function(child) {
// recursive method used in conjunction with walk_tree to remove invisible elements
$child = $(child);
if (child.nodeType === Node.ELEMENT_NODE &&
this.is_invisible($child) === false) {
$child.remove();
return;
}
this.walk_tree($child, this.remove_invisible_items);
};
MainView.prototype.extract_element_text = function(child) {
// recursive method used in conjunction with walk_tree to extract texts
$child = $(child);
if (this.is_valid_element(child) === false ||
this.is_invisible($child) === true ||
this.is_blacklisted($child) === true) {
return;
}
if (child.nodeType === Node.TEXT_NODE) {
text = $child.text();
} else if (child.nodeType === Node.ELEMENT_NODE) {
if (this.has_valid_text($child) === true) {
var $clone = $child.clone();
this.walk_tree($clone, this.remove_invisible_items);
text = $clone.text();
} else {
this.walk_tree($child, this.extract_element_text);
return;
}
}
text = this.remove_extra_spaces(text);
// remove empty lines
if (text.replace(/[.,;:!?()—\r\n\s]*/g, '') === '') {
return;
}
// ensure there is a pause after every line adding a period
if (this.has_ending_punctuation(text) === false) {
text += '.';
}
this.results.push(text);
};
MainView.prototype.walk_tree = function($node, callback) {
// this method recursivelly walks into elements tree and call callback method
var i, len, child, $child, text;
var ref = $node.contents();
for (i = 0, len = ref.length; i < len; i++) {
child = ref[i];
callback.call(this, child);
}
};
MainView.prototype.extract_text = function() {
// extract page text
var i, len, $el, text, byline_added;
this.results = [];
this.walk_tree($('#content'), this.extract_element_text);
return this.results.join(' ');
};
MainView.prototype.play_pause = function(e) {
// play/pause button
e.preventDefault();
if (this.playing) {
if (this.paused) {
Expand All @@ -39,8 +149,7 @@ var MainView = (function() {
}
} else {
responsiveVoice.speak(
// remove spaces to avoid issues with some Firefox versions
$('#content').text().replace(/\s+/g, ' ').trim(),
this.extract_text(),
this.voice, {
onstart: $.proxy(this.onstart, this),
onend: $.proxy(this.onend, this)
Expand Down
6 changes: 6 additions & 0 deletions src/collective/texttospeech/tests/test_controlpanel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from collective.texttospeech.config import DEFAULT_CSS_CLASS_BLACKLIST
from collective.texttospeech.config import DEFAULT_ENABLED_CONTENT_TYPES
from collective.texttospeech.config import PROJECTNAME
from collective.texttospeech.interfaces import ITextToSpeechControlPanel
Expand Down Expand Up @@ -66,6 +67,10 @@ def test_voice_record_in_registry(self):
self.assertTrue(hasattr(self.settings, 'voice'))
self.assertEqual(self.settings.voice, u'UK English Female')

def test_css_class_blacklist_record_in_registry(self):
self.assertTrue(hasattr(self.settings, 'css_class_blacklist'))
self.assertEqual(self.settings.css_class_blacklist, DEFAULT_CSS_CLASS_BLACKLIST)

def test_records_removed_on_uninstall(self):
qi = self.portal['portal_quickinstaller']
qi.uninstallProducts(products=[PROJECTNAME])
Expand All @@ -74,6 +79,7 @@ def test_records_removed_on_uninstall(self):
ITextToSpeechControlPanel.__identifier__ + '.globally_enabled',
ITextToSpeechControlPanel.__identifier__ + '.enabled_content_types',
ITextToSpeechControlPanel.__identifier__ + '.voice',
ITextToSpeechControlPanel.__identifier__ + '.css_class_blacklist',
]

for r in records:
Expand Down
23 changes: 22 additions & 1 deletion src/collective/texttospeech/tests/test_upgrades.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_profile_version(self):

def test_registered_steps(self):
steps = len(self.setup.listUpgrades(self.profile_id)[0])
self.assertEqual(steps, 3)
self.assertEqual(steps, 4)

@unittest.skipIf(IS_PLONE_5, 'Upgrade step not supported under Plone 5')
def test_update_library_condition(self):
Expand All @@ -97,3 +97,24 @@ def test_update_library_condition(self):
# run the upgrade step to validate the update
self._do_upgrade(step)
self.assertEqual(resource.getExpression(), EXPRESSION)

def test_add_css_class_blacklist_field(self):
# check if the upgrade step is registered
title = u'Add CSS class blacklist field to registry'
step = self._get_upgrade_step_by_title(title)
assert step is not None

# simulate state on previous version
from collective.texttospeech.config import DEFAULT_CSS_CLASS_BLACKLIST
from collective.texttospeech.interfaces import ITextToSpeechControlPanel
from plone.registry.interfaces import IRegistry
from zope.component import getUtility
registry = getUtility(IRegistry)
record = ITextToSpeechControlPanel.__identifier__ + '.css_class_blacklist'
del registry.records[record]
assert record not in registry

# run the upgrade step to validate the update
self._do_upgrade(step)
self.assertIn(record, registry)
self.assertEqual(registry[record], DEFAULT_CSS_CLASS_BLACKLIST)
7 changes: 7 additions & 0 deletions src/collective/texttospeech/upgrades/v3/configure.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@
handler="..cook_javascript_resources"
/>

<genericsetup:upgradeDepends
title="Add CSS class blacklist field to registry"
description="Reload registration of configlet registry to add new field."
import_steps="plone.app.registry"
run_deps="false"
/>

</genericsetup:upgradeSteps>

</configure>

0 comments on commit b72412b

Please sign in to comment.