Issue #106: Fixing tests, code

frizbog · Jul 3, 2016 · e6ce913 · e6ce913
1 parent 3a74a0c
commit e6ce913
Show file tree

Hide file tree

Showing 6 changed files with 55 additions and 81 deletions.
diff --git a/src/main/java/org/gedcom4j/io/reader/AnselReader.java b/src/main/java/org/gedcom4j/io/reader/AnselReader.java
@@ -64,7 +64,7 @@ class AnselReader extends AbstractEncodingSpecificReader {
     /**
      * Are we at the end of file yet?
      */
-    private final boolean eof = false;
+    private boolean eof = false;
 
     /**
      * Constructor
@@ -92,24 +92,18 @@ public String nextLine() throws IOException, GedcomParserException {
             // Check for EOF
             if (currChar < 0) {
                 result = getThisLine();
+                eof = true;
                 break;
             }
 
-            // Check for carriage returns - signify EOL
-            if (currChar == 0x0D) {
-                result = getThisLine();
-                lineBufferIdx = 0;
-                break;
-            }
-
-            // Check for line feeds - signify EOL (unless prev char was a
-            // CR)
-            if (currChar == 0x0A) {
-                if (lastChar != 0x0D) {
+            // Check for carriage returns or line feeds - signify EOL
+            if (currChar == 0x0D || currChar == 0x0A) {
+                if (lineBufferIdx > 0) {
                     result = getThisLine();
                     lineBufferIdx = 0;
+                    break;
                 }
-                break;
+                continue;
             }
 
             // All other characters are treated the same at this point,
@@ -118,10 +112,11 @@ public String nextLine() throws IOException, GedcomParserException {
 
             if (lineBufferIdx >= 255) {
                 result = getThisLine();
+                lineBufferIdx = 0;
                 insertSyntheticConcTag();
+                break;
             }
 
-            continue;
         }
         return result;
     }
@@ -146,8 +141,8 @@ private int getCurrentLevelFromLineBuffer() throws GedcomParserException {
                      * Line is too long and doesn't begin with a 1 or 2 digit number followed by a space, so we can't
                      * put in CONC's on the fly (because we don't know what level we're at)
                      */
-                    throw new GedcomParserException(
-                            "Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. " + "Can't split automatically.");
+                    throw new GedcomParserException("Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. "
+                            + "Can't split automatically.");
                 }
             } else {
                 if (lineBuffer[1] == ' ') {
@@ -157,17 +152,17 @@ private int getCurrentLevelFromLineBuffer() throws GedcomParserException {
                      * Line is too long and doesn't begin with a 1 or 2 digit number followed by a space, so we can't
                      * put in CONC's on the fly (because we don't know what level we're at)
                      */
-                    throw new GedcomParserException(
-                            "Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. " + "Can't split automatically.");
+                    throw new GedcomParserException("Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. "
+                            + "Can't split automatically.");
                 }
             }
         } else {
             /*
              * Line is too long and doesn't begin with a 1 or 2 digit number followed by a space, so we can't put in
              * CONC's on the fly (because we don't know what level we're at)
              */
-            throw new GedcomParserException(
-                    "Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. Can't split automatically.");
+            throw new GedcomParserException("Line " + linesRead
+                    + " exceeds 255 characters and does not begin with a 1 or 2 digit number. Can't split automatically.");
         }
         return level;
     }

diff --git a/src/main/java/org/gedcom4j/io/reader/GedcomFileReader.java b/src/main/java/org/gedcom4j/io/reader/GedcomFileReader.java
@@ -155,23 +155,6 @@ long firstNBytes(int n) {
         return result;
     }
 
-    /**
-     * Save off a chunk of the beginning of the input stream to memory for easy inspection. The data is loaded into the
-     * field
-     * 
-     * @throws IOException
-     *             if the stream of bytes cannot be read.
-     */
-    void saveFirstChunk() throws IOException {
-        byteStream.mark(FIRST_CHUNK_SIZE);
-        int read = byteStream.read(firstChunk);
-        if (read < 0) {
-            throw new IOException("Unable to read bytes off stream");
-        }
-        byteStream.reset();
-
-    }
-
     /**
      * Tries to determined from examining the first 1000 lines/2k of the file if the file is ASCII, ANSEL, or UTF-8
      * encoded using a variety of means.
@@ -279,4 +262,21 @@ private AbstractEncodingSpecificReader getEncodingSpecificReader() throws IOExce
         }
 
     }
+
+    /**
+     * Save off a chunk of the beginning of the input stream to memory for easy inspection. The data is loaded into the
+     * field
+     * 
+     * @throws IOException
+     *             if the stream of bytes cannot be read.
+     */
+    private void saveFirstChunk() throws IOException {
+        byteStream.mark(FIRST_CHUNK_SIZE);
+        int read = byteStream.read(firstChunk);
+        if (read < 0) {
+            throw new IOException("Unable to read bytes off stream");
+        }
+        byteStream.reset();
+
+    }
 }
diff --git a/src/main/java/org/gedcom4j/io/reader/UnicodeBigEndianReader.java b/src/main/java/org/gedcom4j/io/reader/UnicodeBigEndianReader.java
@@ -105,23 +105,17 @@ public String nextLine() throws IOException, GedcomParserException {
 
             beginningOfFile = false;
 
-            // Check for carriage returns - signify EOL
-            if (currChar1 == 0x00 && currChar2 == 0x0D) {
-                result = lineBuffer.toString();
-                lineBuffer.setLength(0);
-                break;
-            }
-
-            // Check for line feeds - signify EOL (unless prev char was a
-            // CR)
-            if (currChar1 == 0x00 && currChar2 == 0x0A) {
-                if (lastChar1 != 0x00 || lastChar2 != 0x0D) {
+            // Check for carriage returns or line feeds - signify EOL
+            if ((currChar1 == 0x00 && currChar2 == 0x0D) || (currChar1 == 0x00 && currChar2 == 0x0A)) {
+                if (lineBuffer.length() > 0) {
                     result = lineBuffer.toString();
                     lineBuffer.setLength(0);
+                    break;
                 }
-                break;
+                continue;
             }
 
+            // Do bit shifting stuff to make the character from the bytes
             int unicodeChar = currChar1 << 8 | currChar2;
             lineBuffer.append(Character.valueOf((char) unicodeChar));
         }

diff --git a/src/main/java/org/gedcom4j/io/reader/UnicodeLittleEndianReader.java b/src/main/java/org/gedcom4j/io/reader/UnicodeLittleEndianReader.java
@@ -107,21 +107,14 @@ public String nextLine() throws IOException, GedcomParserException {
 
             beginningOfFile = false;
 
-            // Check for carriage returns - signify EOL
-            if (currChar1 == 0x0D && currChar2 == 0x00) {
-                result = lineBuffer.toString();
-                lineBuffer.setLength(0);
-                break;
-            }
-
-            // Check for line feeds - signify EOL (unless prev char was a
-            // CR)
-            if (currChar1 == 0x0A && currChar2 == 0x00) {
-                if (lastChar1 != 0x0D || lastChar2 != 0x00) {
+            // Check for carriage returns or line feeds - signify EOL
+            if ((currChar1 == 0x0D && currChar2 == 0x00) || (currChar1 == 0x0A && currChar2 == 0x00)) {
+                if (lineBuffer.length() > 0) {
                     result = lineBuffer.toString();
                     lineBuffer.setLength(0);
+                    break;
                 }
-                break;
+                continue;
             }
 
             int unicodeChar = currChar2 << 8 | currChar1;

diff --git a/src/test/java/org/gedcom4j/io/reader/GedcomFileReaderTest.java b/src/test/java/org/gedcom4j/io/reader/GedcomFileReaderTest.java
@@ -127,11 +127,9 @@ public void testAnselDecodingSingleLine() throws IOException, GedcomParserExcept
             s = new BufferedInputStream(new ByteArrayInputStream(anselData));
 
             GedcomFileReader gr = new GedcomFileReader(new GedcomParser(), s);
-            ;
-            List<String> lines = gr.getLines();
-            assertNotNull(lines);
-            assertFalse(lines.isEmpty());
-            assertEquals("0 He\u0141\u0141o", lines.get(0));
+            String l = gr.nextLine();
+            assertNotNull(l);
+            assertEquals("0 He\u0141\u0141o", l);
         } finally {
             if (s != null) {
                 s.close();
@@ -183,19 +181,14 @@ public void testAnselLfOnly() throws IOException, GedcomParserException {
      */
     @Test
     public void testFirstNBytes() throws IOException, UnsupportedGedcomCharsetException {
-        GedcomFileReader gfr = new GedcomFileReader(new GedcomParser(),
-                new BufferedInputStream(new ByteArrayInputStream(new byte[] { 0x12, 0x34, 0x56, 0x78 })));
+        byte[] bytes = new byte[] { '0', ' ', 0x12, 0x34 };
+        GedcomFileReader gfr = new GedcomFileReader(new GedcomParser(), new BufferedInputStream(new ByteArrayInputStream(bytes)));
         // Haven't save the first chunk yet
         assertNotNull(gfr.firstChunk);
-        assertEquals(0x0, gfr.firstNBytes(1));
-        assertEquals(0x0, gfr.firstNBytes(2));
-        assertEquals(0x0, gfr.firstNBytes(3));
-
-        gfr.saveFirstChunk();
-        assertEquals(0x12, gfr.firstNBytes(1));
-        assertEquals(0x1234, gfr.firstNBytes(2));
-        assertEquals(0x123456, gfr.firstNBytes(3));
-        assertEquals(0x12345678, gfr.firstNBytes(4));
+        assertEquals(0x30, gfr.firstNBytes(1));
+        assertEquals(0x3020, gfr.firstNBytes(2));
+        assertEquals(0x302012, gfr.firstNBytes(3));
+        assertEquals(0x30201234, gfr.firstNBytes(4));
     }
 
     /**
@@ -460,7 +453,7 @@ public void testUtf8LfNoBOM() throws IOException, GedcomParserException {
      * @throws GedcomParserException
      *             if the file load was cancelled or had malformed data
      */
-    void testUtf8File(String fileName) throws IOException, FileNotFoundException, GedcomParserException {
+    private void testUtf8File(String fileName) throws IOException, FileNotFoundException, GedcomParserException {
         FileInputStream fileInputStream = null;
         BufferedInputStream bufferedInputStream = null;
         try {

diff --git a/src/test/java/org/gedcom4j/io/reader/LongLineReaderTest.java b/src/test/java/org/gedcom4j/io/reader/LongLineReaderTest.java
@@ -66,14 +66,13 @@ public void testAnsel() throws IOException, GedcomParserException {
                 strings.add(s);
                 s = ar.nextLine();
             }
-
             assertNotNull(strings);
             assertEquals("Should say there were 12 lines even though the file only has 11", 12, strings.size());
             assertEquals("0 @N1@ NOTE This is an ridiculously long line that exceeds the GEDCOM maximum line length of 255 characters "
                     + "so that we can test whether the readers can properly introduce CONC tags on the fly and keep going as if "
                     + "everything was ok when the file has lines ", strings.get(9));
-            assertEquals("1 CONC that are way too long like this one is, even though there are lots of programs that write non-standard GEDCOM files.",
-                    strings.get(10));
+            assertEquals("1 CONC that are way too long like this one is, even though there are lots of programs that write non-standard GEDCOM files.", strings
+                    .get(10));
 
             gp = new GedcomParser();
             gp.load("sample/superlongline-ansel.ged");