Skip to content

Commit

Permalink
initial implementation of an Excel file LineReader class
Browse files Browse the repository at this point in the history
  • Loading branch information
bill-baumgartner committed Dec 13, 2016
1 parent c9479a1 commit 5aea68a
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 1 deletion.
10 changes: 9 additions & 1 deletion pom.xml
@@ -1,5 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand Down Expand Up @@ -74,6 +75,13 @@
<version>1.16.10</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.15</version>
</dependency>

</dependencies>


Expand Down
@@ -0,0 +1,82 @@
package edu.ucdenver.ccp.common.file.reader;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

import edu.ucdenver.ccp.common.file.reader.Line.LineTerminator;

/**
* Simple class to read lines from an Excel file. This class assumes that the
* first line in a header (and uses the header to determine the number of
* columns in the file).
*/
public class ExcelFileLineReader extends LineReader<Line> {

private final int columnCount;
private final int rowCount;
private int currentRow = 1;
private final Workbook wb;
private final Sheet sheet;
private long byteOffset = -1;


public ExcelFileLineReader(File file, String skipLinePrefix) throws IOException {
this(new FileInputStream(file), skipLinePrefix);
}

public ExcelFileLineReader(InputStream inputStream, String skipLinePrefix) throws IOException {
super(skipLinePrefix);
try {
wb = WorkbookFactory.create(inputStream);
} catch (EncryptedDocumentException | InvalidFormatException e) {
throw new IOException(e);
}
sheet = wb.getSheetAt(0);

// use the header row to define the number of columns
Row headerRow = sheet.getRow(0);
columnCount = headerRow.getLastCellNum();
rowCount = sheet.getLastRowNum();

}

@Override
public void close() throws IOException {
wb.close();

}

@Override
protected Line getNextLine() throws IOException {
StringBuffer lineText = new StringBuffer();
String delimiter = "\t";
if (currentRow <= rowCount) {
Row r = sheet.getRow(currentRow++);
for (int col = 0; col < columnCount; col++) {
Cell cell = r.getCell(col, MissingCellPolicy.RETURN_BLANK_AS_NULL);
if (col > 0) {
lineText.append(delimiter);
}
if (cell != null) {
lineText.append(cell.getStringCellValue());
}
}
return new Line(lineText.toString(), LineTerminator.CR, getCharacterOffset(), getCodePointOffset(),
currentRow, byteOffset);
}
return null;
}

}
@@ -0,0 +1,44 @@
package edu.ucdenver.ccp.common.file.reader;

import static org.junit.Assert.*;

import java.io.IOException;
import java.io.InputStream;

import org.junit.Test;

import edu.ucdenver.ccp.common.io.ClassPathUtil;

public class ExcelFileLineReaderTest {

@Test
public void testReadXlsxFile() throws IOException {
InputStream xlsxStream = ClassPathUtil.getResourceStreamFromClasspath(getClass(), "sample.xlsx");
ExcelFileLineReader lineReader = new ExcelFileLineReader(xlsxStream, null);
Line line = lineReader.readLine();
assertEquals("a\tb\tc\td", line.getText());
line = lineReader.readLine();
assertEquals("a\t\tc\td", line.getText());
line = lineReader.readLine();
assertEquals("\tb\tc\td", line.getText());
line = lineReader.readLine();
assertEquals("\t\tc\t", line.getText());
lineReader.close();
}

@Test
public void testReadXlsFile() throws IOException {
InputStream xlsxStream = ClassPathUtil.getResourceStreamFromClasspath(getClass(), "sample.xls");
ExcelFileLineReader lineReader = new ExcelFileLineReader(xlsxStream, null);
Line line = lineReader.readLine();
assertEquals("a\tb\tc\td", line.getText());
line = lineReader.readLine();
assertEquals("a\t\tc\td", line.getText());
line = lineReader.readLine();
assertEquals("\tb\tc\td", line.getText());
line = lineReader.readLine();
assertEquals("\t\tc\t", line.getText());
lineReader.close();
}

}
Binary file not shown.
Binary file not shown.

0 comments on commit 5aea68a

Please sign in to comment.