Skip to content

Commit

Permalink
basic implementation of #'clean
Browse files Browse the repository at this point in the history
  • Loading branch information
archimag committed Mar 24, 2011
1 parent 43dd8df commit 1d3afcd
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 43 deletions.
4 changes: 3 additions & 1 deletion sanitize.asd
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
;;;; sanitize.asd

(defsystem sanitize
:depends-on (#:cl-libxml2)
:components
((:module "src"
:components
((:file "packages")
(:file "mode" :depends-on ("packages"))))))
(:file "mode" :depends-on ("packages"))
(:file "clean" :depends-on ("mode"))))))
69 changes: 69 additions & 0 deletions src/clean.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
;;;; clean

(in-package #:sanitize)

(defgeneric clean (html mode)
(:documentation "Return sanitize copy of HTML"))

(defmethod clean ((html string) mode)
(html:with-parse-html-fragment (fragment html)
(clean fragment mode)
(html:serialize-html fragment :to-string)))

(defmethod clean ((node xtree:node) mode)
(case (xtree:node-type node)
(:xml-document-fragment-node
(dolist (item (xtree:all-childs node))
(clean item mode)))
(:xml-text-node node)
(:xml-comment-node
(cond
((mode-allow-comments mode) node)
(t (xtree:remove-child node))))
(:xml-element-node
(clean-element node mode))
(otherwise
(xtree:remove-child node))))

(defun clean-element (element mode
&aux (tagname (xtree:local-name element)))
(dolist (node (xtree:all-childs element))
(clean node mode))

(unless (element-allowed-p mode tagname)
(let ((fragment (xtree:make-document-fragment)))
(dolist (node (xtree:all-childs element))
(xtree:append-child fragment
(xtree:detach node)))

(when (whitespace-element-p mode tagname)
(xtree:prepend-child fragment
(xtree:make-text " "))
(xtree:append-child fragment
(xtree:make-text " ")))

(xtree:replace-child element fragment)
(return-from clean-element fragment)))

(dolist (attr (xtree:all-attribute-nodes element))
(unless (attribute-allowed-p mode tagname (xtree:local-name attr))
(xtree:remove-child attr)))

(dolist (attr/value (element-additional-attributes mode tagname))
(setf (xtree:attribute-value element (car attr/value))
(cdr attr/value))))















116 changes: 75 additions & 41 deletions src/mode.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,71 @@
(in-package #:sanitize)

(defclass sanitize-mode ()
((allow-comments :initarg :allow-comments :reader mode-allow-comments)
(add-attributes :initarg :add-attributes :reader mode-add-attributes)
(attributes :initarg :attributes :reader mode-attributes)
(elemetns :initarg :elements :reader mode-elements)
(output-format :initarg :output-format :reader mode-output-format)
(protocols :initarg :protocols :reader mode-protocols)
(whitespace-elements :initarg :whitespace-elements :reader mode-whitespace-elements)))

(defmacro define-sanitize-mode (name &key inherit
allow-comments
add-attributes
attributes
elements
output-format
protocols
whitespace-elements)
(declare (ignore inherit))
((allow-comments :initform nil
:initarg :allow-comments
:reader mode-allow-comments)

(add-attributes :initform nil
:initarg :add-attributes
:reader mode-add-attributes)

(attributes :initform nil
:initarg :attributes
:reader mode-attributes)

(elemetns :initform nil
:initarg :elements
:reader mode-elements)

(output-format :initform :html
:initarg :output-format
:reader mode-output-format)

(protocols :initform nil
:initarg :protocols
:reader mode-protocols)

(whitespace-elements :initarg :whitespace-elements
:reader mode-whitespace-elements
:initform nil)))

(defmethod shared-initialize :after ((mode sanitize-mode) slot-names &key &allow-other-keys)
(unless (mode-output-format mode)
(setf (slot-value mode 'output-format)
:html))

(unless (mode-whitespace-elements mode)
(setf (slot-value mode 'whitespace-elements)
(list "address" "article" "aside" "blockquote" "br" "dd" "div" "dl"
"dt" "footer" "h1" "h2" "h3" "h4" "h5" "h6" "header" "hgroup"
"hr" "li" "nav" "ol" "p" "pre" "section" "ul"))))

(defun element-allowed-p (mode tagname)
(member tagname
(mode-elements mode)
:test #'string-equal))

(defun attribute-allowed-p (mode tagname attrname)
(member attrname
(cdr (assoc tagname
(mode-attributes mode)
:test #'string-equal))
:test #'string-equal))

(defun whitespace-element-p (mode tagname)
(member tagname
(mode-whitespace-elements mode)
:test #'string-equal))

(defun element-additional-attributes (mode tagname)
(cdr (assoc tagname
(mode-add-attributes mode)
:test #'string-equal)))


(defmacro define-sanitize-mode (name &key
allow-comments add-attributes attributes elements
output-format protocols whitespace-elements)
`(defparameter ,name
(make-instance 'sanitize-mode
:allow-comments ',allow-comments
Expand All @@ -30,23 +78,12 @@
:protocols ',protocols
:whitespace-elements ',whitespace-elements)))

(define-sanitize-mode +default+
:allow-comments nil
:add-attributes nil
:attributes nil
:elements #()
:output-format :html
:protocols nil
:whitespace-elements #(("address" "article" "aside" "blockquote" "br" "dd" "div" "dl" "dt" "footer"
"h1" "h2" "h3" "h4" "h5" "h6" "header" "hgroup" "hr" "li" "nav" "ol" "p" "pre"
"section" "ul")))
(define-sanitize-mode +default+)

(define-sanitize-mode +basic+
:inherit +default+

:elements #("a" "abbr" "b" "blockquote" "br" "cite" "code" "dd" "dfn" "dl" "dt" "em" "i"
"kbd" "li" "mark" "ol" "p" "pre" "q" "s" "samp" "small" "strike" "strong"
"sub" "sup" "time" "u" "ul" "var")
:elements ("a" "abbr" "b" "blockquote" "br" "cite" "code" "dd" "dfn" "dl" "dt" "em" "i"
"kbd" "li" "mark" "ol" "p" "pre" "q" "s" "samp" "small" "strike" "strong"
"sub" "sup" "time" "u" "ul" "var")

:attributes (("a" . ("href"))
("abbr" . ("title"))
Expand All @@ -63,13 +100,11 @@


(define-sanitize-mode +relaxed+
:inherit +default+

:elements #("a" "abbr" "b" "bdo" "blockquote" "br" "caption" "cite" "code" "col"
"colgroup" "dd" "del" "dfn" "dl" "dt" "em" "figcaption" "figure" "h1" "h2"
"h3" "h4" "h5" "h6" "hgroup" "i" "img" "ins" "kbd" "li" "mark" "ol" "p" "pre"
"q" "rp" "rt" "ruby" "s" "samp" "small" "strike" "strong" "sub" "sup" "table"
"tbody" "td" "tfoot" "th" "thead" "time" "tr" "u" "ul" "var" "wbr")
:elements ("a" "abbr" "b" "bdo" "blockquote" "br" "caption" "cite" "code" "col"
"colgroup" "dd" "del" "dfn" "dl" "dt" "em" "figcaption" "figure" "h1" "h2"
"h3" "h4" "h5" "h6" "hgroup" "i" "img" "ins" "kbd" "li" "mark" "ol" "p" "pre"
"q" "rp" "rt" "ruby" "s" "samp" "small" "strike" "strong" "sub" "sup" "table"
"tbody" "td" "tfoot" "th" "thead" "time" "tr" "u" "ul" "var" "wbr")

:attributes ((:all . ("dir" "lang" "title"))
("a" . ("href"))
Expand All @@ -95,6 +130,5 @@
("q" . (("cite" . ("http" "https" :relative))))))

(define-sanitize-mode +restricted+
:inherit +default+
:elements #("b" "em" "i" "strong" "u"))
:elements ("b" "em" "i" "strong" "u"))

4 changes: 3 additions & 1 deletion src/packages.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@
#:+default+
#:+basic+
#:+relaxed+
#:+restricted+))
#:+restricted+

#:clean))

0 comments on commit 1d3afcd

Please sign in to comment.