Skip to content


Updating CleanPaste to work properly in Safari
Browse files Browse the repository at this point in the history
- Also includes fixes for plaintext pasting, FF, IE
- Tested in IE6,7,8; FF3,4,5; Safari and Chrome
- Run through JSLint
  • Loading branch information
Jo Carter committed Jul 7, 2011
1 parent c184e79 commit 66de0cc
Showing 1 changed file with 199 additions and 126 deletions.
325 changes: 199 additions & 126 deletions Source/MooEditable/MooEditable.CleanPaste.js
Expand Up @@ -5,13 +5,16 @@ name: MooEditable.CleanPaste
description: Extends MooEditable to insert text copied from other editors like word without all that messy style-information.
updates in this version: Improved Internet Explorer handling to break text on to new lines. Improved handling of some styles from newer versions of MS Word to remove extra style tags that were remaining.
updates in previous version: Improved Internet Explorer handling to break text on to new lines. Improved handling of some styles from newer versions of MS Word to remove extra style tags that were remaining. (David)
updates in this version: Fixed CleanPaste in Safari (Jo)
license: MIT-style license
- André Fiedler <>
- AndrŽ Fiedler <>
- David Bennett <>
- Jo Carter <>
- MooEditable
Expand All @@ -26,7 +29,7 @@ usage:
<script src="MooEditable.CleanPaste.js"></script>
window.addEvent('domready', function(){
window.addEvent('domready', function (){
var mooeditable = $('textarea-1').mooEditable();
Expand All @@ -36,28 +39,40 @@ provides: [MooEditable.CleanPaste]

(function () {

MooEditable = Class.refactor(MooEditable, {

Extends: MooEditable,

// @FIXED: Removed because inferred by above and breaks MooEditable completely with MooTools 1.3.
// Extends: MooEditable,

attach: function(){
attach: function () {
var ret = this.previous();
this.doc.body.addListener('paste', this.cleanPaste.bind(this));
return ret;

cleanPaste: function(e){
cleanPaste: function (e) {
var txtPastet = e.clipboardData && e.clipboardData.getData ?
e.clipboardData.getData('text/html') : // Standard
window.clipboardData && window.clipboardData.getData ?
window.clipboardData.getData('Text') : // MS
if(!!txtPastet) { // IE and Safari
if(window.clipboardData) this.selection.insertContent(this.cleanHtml(txtPastet, 1)); // IE
else this.selection.insertContent(this.cleanHtml(txtPastet)); // Safari
new Event(e).stop();

// @FIXED: If !MS and data is not html - try this (ie. pasting plain text)
if ((!txtPastet || '' === txtPastet.trim()) && e.clipboardData && e.clipboardData.getData) {
txtPastet = e.clipboardData.getData('Text');

if (!!txtPastet) { // IE and Safari
if (window.clipboardData) {
this.selection.insertContent(this.cleanHtml(txtPastet, 1)); // IE
else {
this.selection.insertContent(this.cleanHtml(txtPastet)); // Safari

new Event(e).stop();
else { // no clipboard data available
this.selection.insertContent('<span id="INSERTION_MARKER">&nbsp;</span>');
Expand All @@ -68,127 +83,185 @@ provides: [MooEditable.CleanPaste]
return this;

replaceMarkerWithPastedText: function(){
var txtPastet = this.doc.body.get('html');
var txtPastetClean = this.cleanHtml(txtPastet);
replaceMarkerWithPastedText: function () {
var txtPastetClean = this.cleanHtml(this.doc.body.get('html'));
this.doc.body.set('html', this.txtMarked);
var node = this.doc.body.getElementById('INSERTION_MARKER');
return this;

cleanHtml: function(html, isie){
if(isie) {
html = "<p>" + html + "<\/p>";
html = html.replace(/\n/g, "<\/p><p>");
else {

html = html.replace(/<o:p>\s*<\/o:p>/g, '');
html = html.replace(/<o:p>[\s\S]*?<\/o:p>/g, '&nbsp;');

// remove mso-xxx styles.
html = html.replace(/\s*mso-[^:]+:[^;'"]+;?/gi, '');

// remove margin styles.
html = html.replace(/\s*MARGIN: 0cm 0cm 0pt\s*;/gi, '');
html = html.replace(/\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"");

html = html.replace(/\s*TEXT-INDENT: 0cm\s*;/gi, '');
html = html.replace(/\s*TEXT-INDENT: 0cm\s*"/gi, "\"");

html = html.replace(/\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"");

html = html.replace(/\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"");

html = html.replace(/\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"");

html = html.replace(/\s*tab-stops:[^;"]*;?/gi, '');
html = html.replace(/\s*tab-stops:[^"]*/gi, '');

// remove FONT face attributes.
html = html.replace(/\s*face="[^"]*"/gi, '');
html = html.replace(/\s*face=[^ >]*/gi, '');

html = html.replace(/\s*FONT-FAMILY:[^;"]*;?/gi, '');

// remove class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3");

// remove styles.
html = html.replace(/<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3");
html = html.replace(/<(\w[^>]*) style='([^\']*)'([^>]*)/gi, "<$1$3");

// remove style, meta and link tags
html = html.replace(/<STYLE[^>]*>[\s\S]*?<\/STYLE[^>]*>/gi, '');
html = html.replace(/<(?:META|LINK)[^>]*>\s*/gi, '');

// remove empty styles.
html = html.replace(/\s*style="\s*"/gi, '');

html = html.replace(/<SPAN\s*[^>]*>\s*&nbsp;\s*<\/SPAN>/gi, '&nbsp;');

html = html.replace(/<SPAN\s*[^>]*><\/SPAN>/gi, '');

// remove lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3");

html = html.replace(/<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1');

html = html.replace(/<FONT\s*>([\s\S]*?)<\/FONT>/gi, '$1');

// remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, '');

// remove w: tags with contents.
html = html.replace(/<w:[^>]*>[\s\S]*?<\/w:[^>]*>/gi, '');

// remove tags with XML namespace declarations: <o:p><\/o:p>
html = html.replace(/<\/?\w+:[^>]*>/gi, '');

// remove comments [SF BUG-1481861].
html = html.replace(/<\!--[\s\S]*?-->/g, '');

html = html.replace(/<(U|I|STRIKE)>&nbsp;<\/\1>/g, '&nbsp;');

html = html.replace(/<H\d>\s*<\/H\d>/gi, '');

// remove "display:none" tags.
html = html.replace(/<(\w+)[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none[\s \S]*?<\/\1>/ig, '');

// remove language tags
html = html.replace(/<(\w[^>]*) language=([^ |>]*)([^>]*)/gi, "<$1$3");

// remove onmouseover and onmouseout events (from MS word comments effect)
html = html.replace(/<(\w[^>]*) onmouseover="([^\"]*)"([^>]*)/gi, "<$1$3");
html = html.replace(/<(\w[^>]*) onmouseout="([^\"]*)"([^>]*)/gi, "<$1$3");

// the original <Hn> tag send from word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">
html = html.replace(/<H(\d)([^>]*)>/gi, '<h$1>');

// word likes to insert extra <font> tags, when using IE. (Wierd).
html = html.replace(/<(H\d)><FONT[^>]*>([\s\S]*?)<\/FONT><\/\1>/gi, '<$1>$2<\/$1>');
html = html.replace(/<(H\d)><EM>([\s\S]*?)<\/EM><\/\1>/gi, '<$1>$2<\/$1>');

// remove "bad" tags
html = html.replace(/<\s+[^>]*>/gi, '');

// remove empty tags (three times, just to be sure).
// This also removes any empty anchor
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');

// Convert <p> to <br />
if (!this.options.paragraphise) {
html.replace(/<p>/gi, '<br />');
html.replace(/<\\p>/gi, '');
cleanHtml: function (html, isie) {
if (isie) {
if (!this.options.paragraphise) {
html = html.replace(/\n/g, "<br />");
else {
html = "<p>" + html + "<\/p>";
html = html.replace(/\n/g, "<\/p><p>");
html = html.replace(/<p>\s<\/p>/gi, '');

return html;
else {
// @FIXED: Safari pastes in styles with ' not " - fixed to not be broken in safari
// @FIXED: Word pastes in Safari

// remove body and html tag
html = html.replace(/<html[^>]*?>(.*)/gim, "$1");
html = html.replace(/<\/html>/gi, '');
html = html.replace(/<body[^>]*?>(.*)/gi, "$1");
html = html.replace(/<\/body>/gi, '');

// remove style, meta and link tags
html = html.replace(/<style[^>]*?>[\s\S]*?<\/style[^>]*>/gi, '');
html = html.replace(/<(?:meta|link)[^>]*>\s*/gi, '');

// remove XML elements and declarations
html = html.replace(/<\\?\?xml[^>]*>/gi, '');

// remove w: tags with contents.
html = html.replace(/<w:[^>]*>[\s\S]*?<\/w:[^>]*>/gi, '');

// remove tags with XML namespace declarations: <o:p><\/o:p>
html = html.replace(/<o:p>\s*<\/o:p>/g, '');
html = html.replace(/<o:p>[\s\S]*?<\/o:p>/g, '&nbsp;');
html = html.replace(/<\/?\w+:[^>]*>/gi, '');

// remove comments [SF BUG-1481861].
html = html.replace(/<\!--[\s\S]*?-->/g, '');
html = html.replace(/<\!\[[\s\S]*?\]>/g, '');

// remove mso-xxx styles.
html = html.replace(/\s*mso-[^:]+:[^;"']+;?/gi, '');

// remove styles.
html = html.replace(/<(\w[^>]*) style='([^\']*)'([^>]*)/gim, "<$1$3");
html = html.replace(/<(\w[^>]*) style="([^\"]*)"([^>]*)/gim, "<$1$3");

// remove margin styles.
html = html.replace(/\s*margin: 0cm 0cm 0pt\s*;/gi, '');
html = html.replace(/\s*margin: 0cm 0cm 0pt\s*"/gi, "\"");

html = html.replace(/\s*text-indent: 0cm\s*;/gi, '');
html = html.replace(/\s*text-indent: 0cm\s*"/gi, "\"");

html = html.replace(/\s*text-align: [^\s;]+;?"/gi, "\"");

html = html.replace(/\s*page-break-before: [^\s;]+;?"/gi, "\"");

html = html.replace(/\s*font-variant: [^\s;]+;?"/gi, "\"");

html = html.replace(/\s*tab-stops:[^;"']*;?/gi, '');
html = html.replace(/\s*tab-stops:[^"']*/gi, '');

// remove font face attributes.
html = html.replace(/\s*face="[^"']*"/gi, '');
html = html.replace(/\s*face=[^ >]*/gi, '');

html = html.replace(/\s*font-family:[^;"']*;?/gi, '');
html = html.replace(/\s*font-size:[^;"']*;?/gi, '');

// remove class attributes
html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3");

// remove "display:none" attributes.
html = html.replace(/<(\w+)[^>]*\sstyle="[^"']*display\s?:\s?none[\s \S]*?<\/\1>/ig, '');

// remove empty styles.
html = html.replace(/\s*style='\s*'/gi, '');
html = html.replace(/\s*style="\s*"/gi, '');

html = html.replace(/<span\s*[^>]*>\s*&nbsp;\s*<\/span>/gi, '&nbsp;');

html = html.replace(/<span\s*[^>]*><\/span>/gi, '');

// remove align attributes
html = html.replace(/<(\w[^>]*) align=([^ |>]*)([^>]*)/gi, "<$1$3");

// remove lang attributes
html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3");

html = html.replace(/<span([^>]*)>([\s\S]*?)<\/span>/gi, '$2');

html = html.replace(/<font\s*>([\s\S]*?)<\/font>/gi, '$1');

html = html.replace(/<(u|i|strike)>&nbsp;<\/\1>/gi, '&nbsp;');

html = html.replace(/<h\d>\s*<\/h\d>/gi, '');

// remove language attributes
html = html.replace(/<(\w[^>]*) language=([^ |>]*)([^>]*)/gi, "<$1$3");

// remove onmouseover and onmouseout events (from MS word comments effect)
html = html.replace(/<(\w[^>]*) onmouseover="([^\"']*)"([^>]*)/gi, "<$1$3");
html = html.replace(/<(\w[^>]*) onmouseout="([^\"']*)"([^>]*)/gi, "<$1$3");

// the original <Hn> tag sent from word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">
html = html.replace(/<h(\d)([^>]*)>/gi, '<h$1>');

// word likes to insert extra <font> tags, when using IE. (Weird).
html = html.replace(/<(h\d)><font[^>]*>([\s\S]*?)<\/font><\/\1>/gi, '<$1>$2<\/$1>');
html = html.replace(/<(h\d)><em>([\s\S]*?)<\/em><\/\1>/gi, '<$1>$2<\/$1>');

// i -> em, b -> strong - doesn't match nested tags e.g <b><i>some text</i></b> - not possible in regexp
// @see - etc.
html = html.replace(/<b\b[^>]*>(.*?)<\/b[^>]*>/gi, '<strong>$1</strong>');
html = html.replace(/<i\b[^>]*>(.*?)<\/i[^>]*>/gi, '<em>$1</em>');

// remove "bad" tags
html = html.replace(/<\s+[^>]*>/gi, '');

// remove empty <span>s (ie. no attributes, no reason for span in pasted text)
// done twice for nested spans
html = html.replace(/<span>([\s\S]*?)<\/span>/gi, '$1');
html = html.replace(/<span>([\s\S]*?)<\/span>/gi, '$1');

// remove empty <div>s (see span)
html = html.replace(/<div>([\s\S]*?)<\/div>/gi, '$1');
html = html.replace(/<div>([\s\S]*?)<\/div>/gi, '$1');

// remove empty tags (three times, just to be sure - for nested empty tags).
// This also removes any empty anchors
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');

html = html.trim();

// Convert <p> to <br />
if (!this.options.paragraphise) {
html.replace(/<p>/gi, '<br />');
html.replace(/<\/p>/gi, '');
// Check if in paragraph - this fixes FF3.6 and it's <br id=""> issue
else {
var check = html.substr(0,2);
if ('<p' !== check) {
html = '<p>' + html + '</p>';
// Replace breaks with paragraphs
html = html.replace(/\n/g, "<\/p><p>");
html = html.replace(/<br[^>]*>/gi, '<\/p><p>');

// Make it valid xhtml
html = html.replace(/<br>/gi, '<br />');

// remove <br>'s that end a paragraph here.
html = html.replace(/<br[^>]*><\/p>/gim, '</p>');

// remove empty paragraphs - with just a &nbsp; (or whitespace) in (and tags again for good measure)
html = html.replace(/<p>&nbsp;<\/p>/gi,'');
html = html.replace(/<p>\s<\/p>/gi, '');
html = html.replace(/<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '');

html = html.trim();

return html;


0 comments on commit 66de0cc

Please sign in to comment.