Permalink
Browse files

Adding generic reader API

  • Loading branch information...
2 parents 2a021ee + 9170f9d commit e2b8725b1cda051a9700c697ad3ec24bddff065f @asmyczek committed Jul 19, 2011
Showing with 118 additions and 17 deletions.
  1. +0 −1 .gitignore
  2. +7 −7 README.md
  3. +1 −1 project.clj
  4. +97 −5 src/simple_avro/utils.clj
  5. +13 −3 test/simple_avro/utils_tests.clj
View
1 .gitignore
@@ -2,7 +2,6 @@ pom.xml
lib
classes
simple-avro-*.jar
-edit.sh
bin
clojure*.jar
.project
View
14 README.md
@@ -1,5 +1,5 @@
# simple-avro
-Clojure wrapper around Avro schema and serialization.
+Clojure wrapper for Avro schema and serialization.
## Quick Start
@@ -45,12 +45,12 @@ Use equivalent decoder to de-serialize objects using _unpack_.
### Custom types API
_simple-avro.core_ supports only basic Avro types. For custom types import _simple-avro.api_ instead of _core_.
-To add support for a custom type first add a schema best matching the type. For example a Date object can be represented as:
+To add support for a new custom type first add a schema best matching the type. For example a Date object can be represented as:
(defavro-type avro-date
:time avro-long)
-Second, register convert functions from the object type to Avro record and back using _pack-avro-instance_ and _unpack-avro-instance_:
+Second, register mapping functions from the custom object to Avro record and back using _pack-avro-instance_ and _unpack-avro-instance_:
(pack-avro-instance Date
(fn [date]
@@ -60,25 +60,25 @@ Second, register convert functions from the object type to Avro record and back
(fn [rec]
(Date. (rec "time"))))
-You can user default pack/unpack method to serialize the Date object now:
+Now you can use default pack/unpack methods to serialize Date objects:
(unpack avro-date (pack avro-date (Date.)))
-_simple-avro.api_ adds serialization support for Date, UUID and a _avro-maybe_ helper for optional values.
+_simple-avro.api_ adds serialization support for Date, UUID and an _avro-maybe_ helper for optional values.
For more details see examples and unit tests.
## Installation
### Leiningen
- [simple-avro/simple-avro "0.0.2"]
+ [simple-avro/simple-avro "0.0.3"]
### Maven
<dependency>
<groupId>simple-avro</groupId>
<artifactId>simple-avro</artifactId>
- <version>0.0.2</version>
+ <version>0.0.3</version>
</dependency>
View
2 project.clj
@@ -1,4 +1,4 @@
-(defproject simple-avro "0.0.3-1.4-SNAPSHOT"
+(defproject simple-avro "0.0.4-1.4-SNAPSHOT"
:description "Clojure wrapper around Avro schema and serialization."
:url "http://github.com/asmyczek/simple-avro"
:dependencies [[org.clojure/clojure "1.2.0"]
View
102 src/simple_avro/utils.clj
@@ -4,13 +4,20 @@
(:require (clojure.contrib [json :as json]))
(:import (org.apache.avro Schema Schema$Type)
(org.apache.avro.file CodecFactory
+ DataFileStream
DataFileWriter
DataFileReader)
(org.apache.avro.generic GenericDatumWriter
GenericDatumReader)
- (java.io File)
+ (java.io InputStream OutputStream File
+ FileOutputStream FileInputStream
+ BufferedOutputStream BufferedInputStream)
(java.net URI URL)))
+;
+; Basic read/write avro file functions
+;
+
(defmulti #^{:private true}
file class)
@@ -26,7 +33,7 @@
(defmethod file URI [#^URI f]
(file (.toURL f)))
-(defn write-file
+(defn avro-spit
"Write to Avro data file."
[f schema objs & [meta]]
(let [schema (avro-schema schema)
@@ -41,7 +48,7 @@
(finally
(.close writer)))))
-(defn read-file
+(defn avro-slurp
"Read data from Avro data file."
[f]
(let [reader (DataFileReader. (file f) (GenericDatumReader.))
@@ -55,12 +62,97 @@
nil))))]
(read-next reader)))
-(defn read-meta
- "Read meta keys from Avro data file."
+(defn avro-slurp-meta
+ "Read meta from Avro data file."
[f & keys]
(let [reader (DataFileReader. (file f) (GenericDatumReader.))]
(loop [[k & ks] keys mta {}]
(if (nil? k)
mta
(recur ks (assoc mta k (String. (.getMeta reader k) "UTF-8")))))))
+;
+; Custom file handling
+;
+
+; output stream
+
+(defmulti #^{:private true}
+ output-stream class)
+
+(defmethod output-stream OutputStream [#^OutputStream os]
+ os)
+
+(defmethod output-stream File [#^File f]
+ (BufferedOutputStream. (FileOutputStream. f)))
+
+(defmethod output-stream String [#^String f]
+ (output-stream (File. f)))
+
+(defmethod output-stream URL [#^URL f]
+ (output-stream (.getPath f)))
+
+(defmethod output-stream URI [#^URI f]
+ (output-stream (.toURL f)))
+
+; Writer
+
+(defprotocol Closable
+ "Closable protocol."
+ (close [this] "Close underlying closable."))
+
+(defprotocol Writer
+ "General writer protocol."
+ (write [this obj] "Write object."))
+
+(defn avro-writer
+ [os schema]
+ (let [schema (avro-schema schema)
+ os (output-stream os)
+ writer (doto (DataFileWriter. (GenericDatumWriter. schema))
+ (.create schema os))]
+ (reify
+ Writer
+ (write [this obj]
+ (.append writer (pack schema obj)))
+ Closable
+ (close [this]
+ (.flush writer)
+ (.close writer)))))
+
+; Input stream
+
+(defmulti #^{:private true}
+ input-stream class)
+
+(defmethod input-stream InputStream [#^OutputStream os]
+ os)
+
+(defmethod input-stream File [#^File f]
+ (BufferedInputStream. (FileInputStream. f)))
+
+(defmethod input-stream String [#^String f]
+ (input-stream (File. f)))
+
+(defmethod input-stream URL [#^URL f]
+ (input-stream (.getPath f)))
+
+(defmethod input-stream URI [#^URI f]
+ (input-stream (.toURL f)))
+
+(defprotocol Reader
+ "General reader protocol."
+ (read-next [this] "Read next element.")
+ (has-next [this] "Checks if more element exists."))
+
+(defn avro-reader
+ [is]
+ (let [is (input-stream is)
+ dfs (DataFileStream. is (GenericDatumReader.))
+ schema (.getSchema dfs)]
+ (reify
+ Reader
+ (read-next [this] (unpack schema (.next dfs)))
+ (has-next [this] (.hasNext dfs))
+ Closable
+ (close [this] (.close dfs)))))
View
16 test/simple_avro/utils_tests.clj
@@ -14,10 +14,20 @@
(deftest read-write-test
(let [file (java.io.File/createTempFile "avro-test-data", ".tmp")
- _ (write-file file Test test-records {"m1" "test1" "m2" "test2"})
- content (read-file file)
- meta (read-meta file "m1" "m2")]
+ _ (avro-spit file Test test-records {"m1" "test1" "m2" "test2"})
+ content (avro-slurp file)
+ meta (avro-slurp-meta file "m1" "m2")]
(is (= content test-records))
(is (= (meta "m1") "test1"))
(is (= (meta "m2") "test2"))))
+
+(doto (avro-writer "/Users/asmyczek/temp/test.avro" avro-string)
+ (write "Just a test")
+ (write "Second entry")
+ (close))
+
+(let [r (avro-reader "/Users/asmyczek/temp/test.avro")]
+ (while (has-next r)
+ (println (read-next r))))
+

0 comments on commit e2b8725

Please sign in to comment.