Skip to content
Browse files

working version of parse & detect-mime-type functions

  • Loading branch information...
1 parent c7a2924 commit 229fec1c889077ea6c820f03ff654e366aee5d2c @alexott committed Mar 25, 2010
Showing with 27 additions and 14 deletions.
  1. +27 −14 src/tika.clj
View
41 src/tika.clj
@@ -1,34 +1,47 @@
;; Clojure Interface to Apache Tika library
-
+;; TODO: add functions for java.net.URL class
(ns tika
(:import (java.io InputStream File FileInputStream))
+ (:import (java.net URL))
(:import (org.apache.tika.parser Parser AutoDetectParser ParseContext))
(:import (org.apache.tika.metadata Metadata))
+ (:import (org.apache.tika Tika))
(:import (org.apache.tika.sax BodyContentHandler))
)
+(def #^{:private true} tika-class (Tika.))
+
(defn- conv-metadata [#^Metadata mdata]
(let [names (.names mdata)]
(zipmap (map #(keyword (.toLowerCase %1)) names)
(map #(seq (.getValues mdata %1)) names))))
-(defn parse-stream
- "Parses Tika-supported stream"
- [#^InputStream ifile]
+(defmulti parse class)
+
+(defmethod parse InputStream [ifile]
(let [parser (new AutoDetectParser)
context (new ParseContext)
metadata (new Metadata)
handler (new BodyContentHandler)
]
(.set context Parser parser)
(.parse parser ifile handler metadata context)
- (.close ifile)
- (let [mdata (conv-metadata metadata)
- txt (.toString handler)]
- (assoc mdata :text txt)
- )))
-
-(defn parse-file
- "Parses Tika-supported file"
- [#^String filename]
- (parse-stream (new FileInputStream (File. filename))))
+ (assoc (conv-metadata metadata) :text (.toString handler))))
+
+(defmethod parse String [filename]
+ (parse (FileInputStream. (File. filename))))
+
+(defmethod parse File [file]
+ (parse (new FileInputStream file)))
+
+;;
+(defmulti detect-mime-type class)
+(defmethod detect-mime-type InputStream [ifile]
+ (.detect tika-class ifile))
+
+(defmethod detect-mime-type String [filename]
+ (.detect tika-class filename))
+
+(defmethod detect-mime-type File [file]
+ (.detect tika-class file))
+

0 comments on commit 229fec1

Please sign in to comment.
Something went wrong with that request. Please try again.