Permalink
Browse files

update README

  • Loading branch information...
1 parent 240ada2 commit 8823b1d69c82af69cb03ea0a660b7773c581c356 @cowtowncoder committed Dec 11, 2011
View
@@ -50,6 +50,12 @@ where sorted output gets printed to `stdout`; and argument is optional (if missi
Format is assumed to be basic text lines, similar to unix `sort`, and sorting order basic byte sorting (which works for most common encodings).
+## More documentation
+
+Here are some external links:
+
+* [Sorting large data sets](http://www.cowtowncoder.com/blog/archives/2011/12/entry_465.html) (includes example for sorting JSON files)
+
# Getting involved
To access source, just clone [project](https://github.com/cowtowncoder/java-merge-sort)
View
@@ -50,7 +50,14 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.2</version>
- <!-- <scope>test</scope> -->
+ <scope>test</scope>
+ </dependency>
+ <!-- Let's also test JSON-based sorting -->
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ <version>1.9.2</version>
+ <scope>test</scope>
</dependency>
</dependencies>
@@ -32,14 +32,17 @@
/**
* Factory used for reading intermediate sorted files.
*/
- protected final DataReaderFactory<T> _readerFactory;
+ protected DataReaderFactory<T> _readerFactory;
/**
* Factory used for writing intermediate sorted files.
*/
- protected final DataWriterFactory<T> _writerFactory;
+ protected DataWriterFactory<T> _writerFactory;
- protected final Comparator<T> _comparator;
+ /**
+ * Comparator to use for sorting entries; defaults to 'C
+ */
+ protected Comparator<T> _comparator;
/*
/**********************************************************************
@@ -85,6 +88,26 @@ public Sorter(SortConfig config,
_phase = null;
}
+ protected Sorter() {
+ this(new SortConfig());
+ }
+
+ protected Sorter(SortConfig config) {
+ this(config, null, null, null);
+ }
+
+ protected Sorter<T> withReaderFactory(DataReaderFactory<T> f) {
+ return new Sorter<T>(_config, f, _writerFactory, _comparator);
+ }
+
+ protected Sorter<T> withWriterFactory(DataWriterFactory<T> f) {
+ return new Sorter<T>(_config, _readerFactory, f, _comparator);
+ }
+
+ protected Sorter<T> withComparator(Comparator<T> cmp) {
+ return new Sorter<T>(_config, _readerFactory, _writerFactory, cmp);
+ }
+
/*
/**********************************************************************
/* SortingState implementation
@@ -0,0 +1,18 @@
+package com.fasterxml.sort.std;
+
+import java.util.Comparator;
+
+/**
+ * Basic comparator implementation that works on types that implement
+ * {@link Comparator}.
+ */
+public class StdComparator<T extends Comparable<T>> implements Comparator<T>
+{
+ @Override
+ public int compare(T object1, T object2) {
+ if (object1 == object2) return 0;
+ if (object1 == null) return -1;
+ return object1.compareTo(object2);
+ }
+
+}
@@ -0,0 +1,108 @@
+package com.fasterxml.sort;
+
+import java.io.*;
+
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.map.*;
+import org.codehaus.jackson.type.JavaType;
+
+import com.fasterxml.sort.std.StdComparator;
+
+public class JsonFileSorter<T extends Comparable<T>> extends Sorter<T>
+{
+ public JsonFileSorter(Class<T> entryType) throws IOException {
+ this(entryType, new SortConfig(), new ObjectMapper());
+ }
+
+ public JsonFileSorter(Class<T> entryType, SortConfig config, ObjectMapper mapper) throws IOException {
+ this(mapper.constructType(entryType), config, mapper);
+ }
+
+ public JsonFileSorter(JavaType entryType, SortConfig config, ObjectMapper mapper)
+ throws IOException
+ {
+ super(config, new ReaderFactory<T>(mapper.reader(entryType)),
+ new WriterFactory<T>(mapper),
+ new StdComparator<T>());
+ }
+
+ static class ReaderFactory<R> extends DataReaderFactory<R>
+ {
+ private final ObjectReader _reader;
+
+ public ReaderFactory(ObjectReader r) {
+ _reader = r;
+ }
+
+ @Override
+ public DataReader<R> constructReader(InputStream in) throws IOException {
+ MappingIterator<R> it = _reader.readValues(in);
+ return new Reader<R>(it);
+ }
+ }
+
+ static class Reader<E> extends DataReader<E>
+ {
+ protected final MappingIterator<E> _iterator;
+
+ public Reader(MappingIterator<E> it) {
+ _iterator = it;
+ }
+
+ @Override
+ public E readNext() throws IOException {
+ if (_iterator.hasNext()) {
+ return _iterator.nextValue();
+ }
+ return null;
+ }
+
+ @Override
+ public int estimateSizeInBytes(E item) {
+ // 2 int fields, object, rough approximation
+ return 24;
+ }
+
+ @Override
+ public void close() throws IOException {
+ // auto-closes when we reach end
+ }
+ }
+
+ static class WriterFactory<W> extends DataWriterFactory<W>
+ {
+ protected final ObjectMapper _mapper;
+
+ public WriterFactory(ObjectMapper m) {
+ _mapper = m;
+ }
+
+ @Override
+ public DataWriter<W> constructWriter(OutputStream out) throws IOException {
+ return new Writer<W>(_mapper, out);
+ }
+ }
+
+ static class Writer<E> extends DataWriter<E>
+ {
+ protected final ObjectMapper _mapper;
+ protected final JsonGenerator _generator;
+
+ public Writer(ObjectMapper mapper, OutputStream out) throws IOException {
+ _mapper = mapper;
+ _generator = _mapper.getJsonFactory().createJsonGenerator(out);
+ }
+
+ @Override
+ public void writeEntry(E item) throws IOException {
+ _mapper.writeValue(_generator, item);
+ // not 100% necesary, but for readability, add linefeeds
+ _generator.writeRaw('\n');
+ }
+
+ @Override
+ public void close() throws IOException {
+ _generator.close();
+ }
+ }
+}
@@ -0,0 +1,54 @@
+package com.fasterxml.sort;
+
+import java.io.*;
+
+public class TestJsonSort extends SortTestBase
+{
+ static class Point implements Comparable<Point>
+ {
+ public int x, y;
+
+ @Override
+ public int compareTo(Point o) {
+ int diff = y - o.y;
+ if (diff == 0) {
+ diff = x - o.x;
+ }
+ return diff;
+ }
+ }
+
+ /*
+ /**********************************************************************
+ /* Unit tests
+ /**********************************************************************
+ */
+
+ public void testSimple() throws IOException
+ {
+ final String input =
+ "{\"x\":1, \"y\":1}\n"
+ +"{\"x\":2, \"y\":8}\n"
+ +"{\"x\":3, \"y\":2}\n"
+ +"{\"x\":4, \"y\":4}\n"
+ +"{\"x\":5, \"y\":5}\n"
+ +"{\"x\":6, \"y\":0}\n"
+ +"{\"x\":7, \"y\":10}\n"
+ +"{\"x\":8, \"y\":-4}\n"
+ ;
+ JsonFileSorter<Point> sorter = new JsonFileSorter<Point>(Point.class);
+ ByteArrayOutputStream out = new ByteArrayOutputStream(1000);
+ sorter.sort(new ByteArrayInputStream(input.getBytes("UTF-8")), out);
+ final String output = out.toString("UTF-8");
+ assertEquals(""
+ +"{\"x\":8,\"y\":-4}\n"
+ +" {\"x\":6,\"y\":0}\n"
+ +" {\"x\":1,\"y\":1}\n"
+ +" {\"x\":3,\"y\":2}\n"
+ +" {\"x\":4,\"y\":4}\n"
+ +" {\"x\":5,\"y\":5}\n"
+ +" {\"x\":2,\"y\":8}\n"
+ +" {\"x\":7,\"y\":10}\n"
+ ,output);
+ }
+}

0 comments on commit 8823b1d

Please sign in to comment.