diff --git a/zip-filter.clj b/zip-filter.clj new file mode 100644 index 00000000..361aa474 --- /dev/null +++ b/zip-filter.clj @@ -0,0 +1,212 @@ +; Copyright (c) Chris Houser, April 2008. All rights reserved. +; The use and distribution terms for this software are covered by the +; Common Public License 1.0 (http://opensource.org/licenses/cpl.php) +; which can be found in the file CPL.TXT at the root of this distribution. +; By using this software in any fashion, you are agreeing to be bound by +; the terms of this license. +; You must not remove this notice, or any other, from this software. + +; System for filtering trees and nodes generated by zip.clj in +; general, and xml trees in particular. + +(in-ns 'zip-filter) +(clojure/refer 'clojure) + +(defn coll? + "Returns true if x implements IPersistentCollection." + [x] (instance? clojure.lang.IPersistentCollection x)) + +(defn right-locs + "Returns a lazy sequence of locations to the right of loc." + [loc] (when loc (lazy-cons loc (right-locs (zip/right loc))))) + +(defn leftmost? + "Returns true if there are no more nodes to the left of location loc." + [loc] (nil? (zip/left loc))) + +(defn flatten + "Returns a lazy sequence of all descencents of location loc, in + depth-first order, left-to-right." + [loc] + (if (zip/branch? loc) + (lazy-cons loc (mapcat flatten (right-locs (zip/down loc)))) + (list loc))) + +(defn fixup-apply + "Calls (func loc), and then converts the result to the 'appropriate' + sequence." + #^{:private true} + [func loc] + (try + ;(prn :PRE (zip/node loc)) + (let [rtn (func loc)] + (cond (= rtn true) (list loc) + (contains? (meta rtn) :zip-filter/is-node?) (list rtn) + (= rtn false) nil + (= rtn nil) nil + (coll? rtn) rtn + :else (list rtn))) + (catch java.lang.NullPointerException e (prn :CAUGHT e)))) + +(defn seq-filter-expr + #^{:private true} + [func s] (mapcat (fn [loc] (fixup-apply func loc)) s)) + +(defn mapcat-chain + "Used in building query macros. See xml->" + #^{:private true} + [loc exprs func] + (let [prevseq (gensym 'prevseq_)] + `(let [initloc# ~loc + ~prevseq + (list (with-meta initloc# + (assoc ^initloc# :zip-filter/is-node? true))) + ~@(mapcat #(list prevseq + (list 'zip-filter/seq-filter-expr + (let [usercode (func %)] + (cond usercode usercode + :else %)) + prevseq)) + exprs)] + ~prevseq))) + + +; === xml-zipper query specialization === + +(in-ns 'zip-filter-xml) +(clojure/refer 'clojure) +(refer 'zip-filter) + +(defn attr + "Returns the xml attribute named attrname, of the xml node at location loc." + [loc attrname] + (let [n (zip/node loc) a (n :attrs)] + (and a (a attrname)))) + +(defn attr= + "Returns a query predicate that matches a node when it has an + attribute named attrname whose value is attrval." + [attrname attrval] + (fn [loc] (= (attr loc attrname) attrval))) + +(defn tag= + "Returns a query predicate that matches a node when its is a tag + named tagname." + [tagname] + (fn [loc] + (if (zip/branch? loc) + (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag))) + (right-locs (zip/down loc))) + nil))) + +(defn content + "Returns the contents of the xml node at location loc." + [loc] ((zip/node loc) :content)) + +(defn content= + "Returns a query predicate that matches a node when its xml content equals s." + [s] (fn [loc] (= ((zip/node loc) :content) [s]))) + +(defmacro seq-test + "Returns a query predicate that matches a node when its xml content + matches the query expresions given." + #^{:private true} + [& preds] `(fn [loc#] (and (xml-> loc# ~@preds) (list loc#)))) + +(defmacro xml-> + "The loc is passed to the first predicate. If the predicate returns + a collection, each value of the collection is passed to the next + predicate. If it returns a location, the location is passed to the + next predicate. If it returns true, the input location is passed to + the next predicate. If it returns false or nil, the next predicate + is not called. + + This process is repeated, passing the processed results of each + predicate to the next predicate. xml-> returns the final sequence. + The entire chain is evaluated lazily. + + There are also special predicates: keywords are converted to + xml-tag=, strings to xml-content=, and vectors to sub-queries that + return true if they match. + + See the footer of zip-query.clj for examples." + [loc & preds] + (mapcat-chain loc preds + #(cond (keyword? %) (list 'tag= %) + (string? %) (list 'content= %) + (vector? %) (list* 'seq-test %)))) + +(defmacro xml1-> + "Returns the first item from loc based on the query predicates + given. See xml->" + [loc & preds] `(first (xml-> ~loc ~@preds))) + +(defn clean-str + "Returns the textual contents of the given sequence of xml + locations, similar to xpaths's value-of" + [locseq] + (. #^String (apply str (mapcat #(xml-> % flatten zip/node string?) locseq)) + (replaceAll (str "[\\s" (char 160) "]+") " "))) + + +; === examples === + +(comment + +(defn parse-str [s] + (zip/xml-zip (xml/parse (new org.xml.sax.InputSource + (new java.io.StringReader s))))) + +(def atom1 (parse-str " + + tag:blogger.com,1999:blog-28403206 + 2008-02-14T08:00:58.567-08:00 + n01senet + + + 1 + 2008-02-13 + clojure is the best lisp yet + Chouser + + + 2 + 2008-02-07 + experimenting with vnc + agriffis + + +")) + +; simple single-function filter +(assert (= (xml-> atom1 #((zip/node %) :tag)) + '(:feed))) + +; two-stage filter using helpful query prediates +(assert (= (xml-> atom1 (tag= :title) content) + '("n01senet"))) + +; same filter as above, this time using keyword shortcut +(assert (= (xml-> atom1 :title content) + '("n01senet"))) + +; multi-stage filter +(assert (= (xml-> atom1 :entry :author :name content) + '("Chouser" "agriffis"))) + +; multi-stage filter with subquery specified using a vector +(assert (= (xml-> atom1 :entry [:author :name (content= "agriffis")] + :id content) + '("2"))) + +; same filter as above, this time using a string shortcut +(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id content) + '("2"))) + +; attribute access +(assert (= (xml-> atom1 :title #(attr % :type)) + '("text"))) + +; attribute filtering +(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] #(attr % :type)) + '("text/html")))