Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial implementation of an Excel file LineReader class
- Loading branch information
1 parent
c9479a1
commit 5aea68a
Showing
5 changed files
with
135 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
82 changes: 82 additions & 0 deletions
82
src/main/java/edu/ucdenver/ccp/common/file/reader/ExcelFileLineReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
package edu.ucdenver.ccp.common.file.reader; | ||
|
||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.FileNotFoundException; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
import org.apache.poi.EncryptedDocumentException; | ||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | ||
import org.apache.poi.ss.usermodel.Cell; | ||
import org.apache.poi.ss.usermodel.Row; | ||
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy; | ||
import org.apache.poi.ss.usermodel.Sheet; | ||
import org.apache.poi.ss.usermodel.Workbook; | ||
import org.apache.poi.ss.usermodel.WorkbookFactory; | ||
|
||
import edu.ucdenver.ccp.common.file.reader.Line.LineTerminator; | ||
|
||
/** | ||
* Simple class to read lines from an Excel file. This class assumes that the | ||
* first line in a header (and uses the header to determine the number of | ||
* columns in the file). | ||
*/ | ||
public class ExcelFileLineReader extends LineReader<Line> { | ||
|
||
private final int columnCount; | ||
private final int rowCount; | ||
private int currentRow = 1; | ||
private final Workbook wb; | ||
private final Sheet sheet; | ||
private long byteOffset = -1; | ||
|
||
|
||
public ExcelFileLineReader(File file, String skipLinePrefix) throws IOException { | ||
this(new FileInputStream(file), skipLinePrefix); | ||
} | ||
|
||
public ExcelFileLineReader(InputStream inputStream, String skipLinePrefix) throws IOException { | ||
super(skipLinePrefix); | ||
try { | ||
wb = WorkbookFactory.create(inputStream); | ||
} catch (EncryptedDocumentException | InvalidFormatException e) { | ||
throw new IOException(e); | ||
} | ||
sheet = wb.getSheetAt(0); | ||
|
||
// use the header row to define the number of columns | ||
Row headerRow = sheet.getRow(0); | ||
columnCount = headerRow.getLastCellNum(); | ||
rowCount = sheet.getLastRowNum(); | ||
|
||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
wb.close(); | ||
|
||
} | ||
|
||
@Override | ||
protected Line getNextLine() throws IOException { | ||
StringBuffer lineText = new StringBuffer(); | ||
String delimiter = "\t"; | ||
if (currentRow <= rowCount) { | ||
Row r = sheet.getRow(currentRow++); | ||
for (int col = 0; col < columnCount; col++) { | ||
Cell cell = r.getCell(col, MissingCellPolicy.RETURN_BLANK_AS_NULL); | ||
if (col > 0) { | ||
lineText.append(delimiter); | ||
} | ||
if (cell != null) { | ||
lineText.append(cell.getStringCellValue()); | ||
} | ||
} | ||
return new Line(lineText.toString(), LineTerminator.CR, getCharacterOffset(), getCodePointOffset(), | ||
currentRow, byteOffset); | ||
} | ||
return null; | ||
} | ||
|
||
} |
44 changes: 44 additions & 0 deletions
44
src/test/java/edu/ucdenver/ccp/common/file/reader/ExcelFileLineReaderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package edu.ucdenver.ccp.common.file.reader; | ||
|
||
import static org.junit.Assert.*; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
|
||
import org.junit.Test; | ||
|
||
import edu.ucdenver.ccp.common.io.ClassPathUtil; | ||
|
||
public class ExcelFileLineReaderTest { | ||
|
||
@Test | ||
public void testReadXlsxFile() throws IOException { | ||
InputStream xlsxStream = ClassPathUtil.getResourceStreamFromClasspath(getClass(), "sample.xlsx"); | ||
ExcelFileLineReader lineReader = new ExcelFileLineReader(xlsxStream, null); | ||
Line line = lineReader.readLine(); | ||
assertEquals("a\tb\tc\td", line.getText()); | ||
line = lineReader.readLine(); | ||
assertEquals("a\t\tc\td", line.getText()); | ||
line = lineReader.readLine(); | ||
assertEquals("\tb\tc\td", line.getText()); | ||
line = lineReader.readLine(); | ||
assertEquals("\t\tc\t", line.getText()); | ||
lineReader.close(); | ||
} | ||
|
||
@Test | ||
public void testReadXlsFile() throws IOException { | ||
InputStream xlsxStream = ClassPathUtil.getResourceStreamFromClasspath(getClass(), "sample.xls"); | ||
ExcelFileLineReader lineReader = new ExcelFileLineReader(xlsxStream, null); | ||
Line line = lineReader.readLine(); | ||
assertEquals("a\tb\tc\td", line.getText()); | ||
line = lineReader.readLine(); | ||
assertEquals("a\t\tc\td", line.getText()); | ||
line = lineReader.readLine(); | ||
assertEquals("\tb\tc\td", line.getText()); | ||
line = lineReader.readLine(); | ||
assertEquals("\t\tc\t", line.getText()); | ||
lineReader.close(); | ||
} | ||
|
||
} |
Binary file not shown.
Binary file not shown.