Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 116 lines (104 sloc) 4.378 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
; Copyright (c) Rich Hickey. All rights reserved.
; The use and distribution terms for this software are covered by the
; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
; which can be found in the file epl-v10.html at the root of this distribution.
; By using this software in any fashion, you are agreeing to be bound by
; the terms of this license.
; You must not remove this notice, or any other, from this software.

(ns clojure.xml
    (:import (org.xml.sax ContentHandler Attributes SAXException)
             (javax.xml.parsers SAXParser SAXParserFactory)))

(def *stack*)
(def *current*)
(def *state*) ; :element :chars :between
(def *sb*)

(defstruct element :tag :attrs :content)

(def tag (accessor element :tag))
(def attrs (accessor element :attrs))
(def content (accessor element :content))

(def content-handler
  (let [push-content (fn [e c]
                       (assoc e :content (conj (or (:content e) []) c)))
        push-chars (fn []
                     (when (and (= *state* :chars)
                                (some (complement #(. Character (isWhitespace %))) (str *sb*)))
                       (set! *current* (push-content *current* (str *sb*)))))]
    (new clojure.lang.XMLHandler
         (proxy [ContentHandler] []
           (startElement [uri local-name q-name #^Attributes atts]
             (let [attrs (fn [ret i]
                           (if (neg? i)
                             ret
                             (recur (assoc ret
                                           (. clojure.lang.Keyword (intern (symbol (. atts (getQName i)))))
                                           (. atts (getValue i)))
                                    (dec i))))
                   e (struct element
                             (. clojure.lang.Keyword (intern (symbol q-name)))
                             (when (pos? (. atts (getLength)))
                               (attrs {} (dec (. atts (getLength))))))]
               (push-chars)
               (set! *stack* (conj *stack* *current*))
               (set! *current* e)
               (set! *state* :element))
             nil)
           (endElement [uri local-name q-name]
             (push-chars)
             (set! *current* (push-content (peek *stack*) *current*))
             (set! *stack* (pop *stack*))
             (set! *state* :between)
             nil)
           (characters [ch start length]
             (when-not (= *state* :chars)
               (set! *sb* (new StringBuilder)))
             (let [#^StringBuilder sb *sb*]
               (. sb (append ch start length))
               (set! *state* :chars))
             nil)
           (setDocumentLocator [locator])
           (startDocument [])
           (endDocument [])
           (startPrefixMapping [prefix uri])
           (endPrefixMapping [prefix])
           (ignorableWhitespace [ch start length])
           (processingInstruction [target data])
           (skippedEntity [name])
           ))))

(defn startparse-sax [s ch]
  (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch)))

(defn parse
  "Parses and loads the source s, which can be a File, InputStream or
String naming a URI. Returns a tree of the xml/element struct-map,
which has the keys :tag, :attrs, and :content. and accessor fns tag,
attrs, and content. Other parsers can be supplied by passing
startparse, a fn taking a source and a ContentHandler and returning
a parser"
  ([s] (parse s startparse-sax))
  ([s startparse]
    (binding [*stack* nil
              *current* (struct element)
              *state* :between
              *sb* nil]
      (startparse s content-handler)
      ((:content *current*) 0))))

(defn emit-element [e]
  (if (instance? String e)
    (println e)
    (do
      (print (str "<" (name (:tag e))))
      (when (:attrs e)
(doseq [attr (:attrs e)]
(print (str " " (name (key attr)) "='" (val attr)"'"))))
      (if (:content e)
(do
(println ">")
(doseq [c (:content e)]
(emit-element c))
(println (str "</" (name (:tag e)) ">")))
(println "/>")))))

(defn emit [x]
  (println "<?xml version='1.0' encoding='UTF-8'?>")
  (emit-element x))

;(export '(tag attrs content parse element emit emit-element))

;(load-file "/Users/rich/dev/clojure/src/xml.clj")
;(def x (xml/parse "http://arstechnica.com/journals.rssx"))
Something went wrong with that request. Please try again.