Skip to content

Commit

Permalink
Issue #106: Fixing tests, code
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Harrah (frizbog) committed Jul 3, 2016
1 parent 3a74a0c commit e6ce913
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 81 deletions.
35 changes: 15 additions & 20 deletions src/main/java/org/gedcom4j/io/reader/AnselReader.java
Expand Up @@ -64,7 +64,7 @@ class AnselReader extends AbstractEncodingSpecificReader {
/**
* Are we at the end of file yet?
*/
private final boolean eof = false;
private boolean eof = false;

/**
* Constructor
Expand Down Expand Up @@ -92,24 +92,18 @@ public String nextLine() throws IOException, GedcomParserException {
// Check for EOF
if (currChar < 0) {
result = getThisLine();
eof = true;
break;
}

// Check for carriage returns - signify EOL
if (currChar == 0x0D) {
result = getThisLine();
lineBufferIdx = 0;
break;
}

// Check for line feeds - signify EOL (unless prev char was a
// CR)
if (currChar == 0x0A) {
if (lastChar != 0x0D) {
// Check for carriage returns or line feeds - signify EOL
if (currChar == 0x0D || currChar == 0x0A) {
if (lineBufferIdx > 0) {
result = getThisLine();
lineBufferIdx = 0;
break;
}
break;
continue;
}

// All other characters are treated the same at this point,
Expand All @@ -118,10 +112,11 @@ public String nextLine() throws IOException, GedcomParserException {

if (lineBufferIdx >= 255) {
result = getThisLine();
lineBufferIdx = 0;
insertSyntheticConcTag();
break;
}

continue;
}
return result;
}
Expand All @@ -146,8 +141,8 @@ private int getCurrentLevelFromLineBuffer() throws GedcomParserException {
* Line is too long and doesn't begin with a 1 or 2 digit number followed by a space, so we can't
* put in CONC's on the fly (because we don't know what level we're at)
*/
throw new GedcomParserException(
"Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. " + "Can't split automatically.");
throw new GedcomParserException("Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. "
+ "Can't split automatically.");
}
} else {
if (lineBuffer[1] == ' ') {
Expand All @@ -157,17 +152,17 @@ private int getCurrentLevelFromLineBuffer() throws GedcomParserException {
* Line is too long and doesn't begin with a 1 or 2 digit number followed by a space, so we can't
* put in CONC's on the fly (because we don't know what level we're at)
*/
throw new GedcomParserException(
"Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. " + "Can't split automatically.");
throw new GedcomParserException("Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. "
+ "Can't split automatically.");
}
}
} else {
/*
* Line is too long and doesn't begin with a 1 or 2 digit number followed by a space, so we can't put in
* CONC's on the fly (because we don't know what level we're at)
*/
throw new GedcomParserException(
"Line " + linesRead + " exceeds 255 characters and does not begin with a 1 or 2 digit number. Can't split automatically.");
throw new GedcomParserException("Line " + linesRead
+ " exceeds 255 characters and does not begin with a 1 or 2 digit number. Can't split automatically.");
}
return level;
}
Expand Down
34 changes: 17 additions & 17 deletions src/main/java/org/gedcom4j/io/reader/GedcomFileReader.java
Expand Up @@ -155,23 +155,6 @@ long firstNBytes(int n) {
return result;
}

/**
* Save off a chunk of the beginning of the input stream to memory for easy inspection. The data is loaded into the
* field
*
* @throws IOException
* if the stream of bytes cannot be read.
*/
void saveFirstChunk() throws IOException {
byteStream.mark(FIRST_CHUNK_SIZE);
int read = byteStream.read(firstChunk);
if (read < 0) {
throw new IOException("Unable to read bytes off stream");
}
byteStream.reset();

}

/**
* Tries to determined from examining the first 1000 lines/2k of the file if the file is ASCII, ANSEL, or UTF-8
* encoded using a variety of means.
Expand Down Expand Up @@ -279,4 +262,21 @@ private AbstractEncodingSpecificReader getEncodingSpecificReader() throws IOExce
}

}

/**
* Save off a chunk of the beginning of the input stream to memory for easy inspection. The data is loaded into the
* field
*
* @throws IOException
* if the stream of bytes cannot be read.
*/
private void saveFirstChunk() throws IOException {
byteStream.mark(FIRST_CHUNK_SIZE);
int read = byteStream.read(firstChunk);
if (read < 0) {
throw new IOException("Unable to read bytes off stream");
}
byteStream.reset();

}
}
18 changes: 6 additions & 12 deletions src/main/java/org/gedcom4j/io/reader/UnicodeBigEndianReader.java
Expand Up @@ -105,23 +105,17 @@ public String nextLine() throws IOException, GedcomParserException {

beginningOfFile = false;

// Check for carriage returns - signify EOL
if (currChar1 == 0x00 && currChar2 == 0x0D) {
result = lineBuffer.toString();
lineBuffer.setLength(0);
break;
}

// Check for line feeds - signify EOL (unless prev char was a
// CR)
if (currChar1 == 0x00 && currChar2 == 0x0A) {
if (lastChar1 != 0x00 || lastChar2 != 0x0D) {
// Check for carriage returns or line feeds - signify EOL
if ((currChar1 == 0x00 && currChar2 == 0x0D) || (currChar1 == 0x00 && currChar2 == 0x0A)) {
if (lineBuffer.length() > 0) {
result = lineBuffer.toString();
lineBuffer.setLength(0);
break;
}
break;
continue;
}

// Do bit shifting stuff to make the character from the bytes
int unicodeChar = currChar1 << 8 | currChar2;
lineBuffer.append(Character.valueOf((char) unicodeChar));
}
Expand Down
Expand Up @@ -107,21 +107,14 @@ public String nextLine() throws IOException, GedcomParserException {

beginningOfFile = false;

// Check for carriage returns - signify EOL
if (currChar1 == 0x0D && currChar2 == 0x00) {
result = lineBuffer.toString();
lineBuffer.setLength(0);
break;
}

// Check for line feeds - signify EOL (unless prev char was a
// CR)
if (currChar1 == 0x0A && currChar2 == 0x00) {
if (lastChar1 != 0x0D || lastChar2 != 0x00) {
// Check for carriage returns or line feeds - signify EOL
if ((currChar1 == 0x0D && currChar2 == 0x00) || (currChar1 == 0x0A && currChar2 == 0x00)) {
if (lineBuffer.length() > 0) {
result = lineBuffer.toString();
lineBuffer.setLength(0);
break;
}
break;
continue;
}

int unicodeChar = currChar2 << 8 | currChar1;
Expand Down
27 changes: 10 additions & 17 deletions src/test/java/org/gedcom4j/io/reader/GedcomFileReaderTest.java
Expand Up @@ -127,11 +127,9 @@ public void testAnselDecodingSingleLine() throws IOException, GedcomParserExcept
s = new BufferedInputStream(new ByteArrayInputStream(anselData));

GedcomFileReader gr = new GedcomFileReader(new GedcomParser(), s);
;
List<String> lines = gr.getLines();
assertNotNull(lines);
assertFalse(lines.isEmpty());
assertEquals("0 He\u0141\u0141o", lines.get(0));
String l = gr.nextLine();
assertNotNull(l);
assertEquals("0 He\u0141\u0141o", l);
} finally {
if (s != null) {
s.close();
Expand Down Expand Up @@ -183,19 +181,14 @@ public void testAnselLfOnly() throws IOException, GedcomParserException {
*/
@Test
public void testFirstNBytes() throws IOException, UnsupportedGedcomCharsetException {
GedcomFileReader gfr = new GedcomFileReader(new GedcomParser(),
new BufferedInputStream(new ByteArrayInputStream(new byte[] { 0x12, 0x34, 0x56, 0x78 })));
byte[] bytes = new byte[] { '0', ' ', 0x12, 0x34 };
GedcomFileReader gfr = new GedcomFileReader(new GedcomParser(), new BufferedInputStream(new ByteArrayInputStream(bytes)));
// Haven't save the first chunk yet
assertNotNull(gfr.firstChunk);
assertEquals(0x0, gfr.firstNBytes(1));
assertEquals(0x0, gfr.firstNBytes(2));
assertEquals(0x0, gfr.firstNBytes(3));

gfr.saveFirstChunk();
assertEquals(0x12, gfr.firstNBytes(1));
assertEquals(0x1234, gfr.firstNBytes(2));
assertEquals(0x123456, gfr.firstNBytes(3));
assertEquals(0x12345678, gfr.firstNBytes(4));
assertEquals(0x30, gfr.firstNBytes(1));
assertEquals(0x3020, gfr.firstNBytes(2));
assertEquals(0x302012, gfr.firstNBytes(3));
assertEquals(0x30201234, gfr.firstNBytes(4));
}

/**
Expand Down Expand Up @@ -460,7 +453,7 @@ public void testUtf8LfNoBOM() throws IOException, GedcomParserException {
* @throws GedcomParserException
* if the file load was cancelled or had malformed data
*/
void testUtf8File(String fileName) throws IOException, FileNotFoundException, GedcomParserException {
private void testUtf8File(String fileName) throws IOException, FileNotFoundException, GedcomParserException {
FileInputStream fileInputStream = null;
BufferedInputStream bufferedInputStream = null;
try {
Expand Down
5 changes: 2 additions & 3 deletions src/test/java/org/gedcom4j/io/reader/LongLineReaderTest.java
Expand Up @@ -66,14 +66,13 @@ public void testAnsel() throws IOException, GedcomParserException {
strings.add(s);
s = ar.nextLine();
}

assertNotNull(strings);
assertEquals("Should say there were 12 lines even though the file only has 11", 12, strings.size());
assertEquals("0 @N1@ NOTE This is an ridiculously long line that exceeds the GEDCOM maximum line length of 255 characters "
+ "so that we can test whether the readers can properly introduce CONC tags on the fly and keep going as if "
+ "everything was ok when the file has lines ", strings.get(9));
assertEquals("1 CONC that are way too long like this one is, even though there are lots of programs that write non-standard GEDCOM files.",
strings.get(10));
assertEquals("1 CONC that are way too long like this one is, even though there are lots of programs that write non-standard GEDCOM files.", strings
.get(10));

gp = new GedcomParser();
gp.load("sample/superlongline-ansel.ged");
Expand Down

0 comments on commit e6ce913

Please sign in to comment.