Skip to content

Commit

Permalink
NIFI-12960 Support reading password-protected files in ExcelReader
Browse files Browse the repository at this point in the history
This closes #8658

Signed-off-by: David Handermann <exceptionfactory@apache.org>
(cherry picked from commit d54e85f)
  • Loading branch information
dan-s1 authored and exceptionfactory committed Apr 25, 2024
1 parent e8d1496 commit bf06ff4
Show file tree
Hide file tree
Showing 6 changed files with 185 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.annotation.lifecycle.OnEnabled;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.DescribedValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.context.PropertyContext;
import org.apache.nifi.controller.ConfigurationContext;
Expand Down Expand Up @@ -61,6 +62,34 @@
+ "(XSSF 2007 OOXML file format) Excel documents and not older .xls (HSSF '97(-2007) file format) documents.")
public class ExcelReader extends SchemaRegistryService implements RecordReaderFactory {

public enum ProtectionType implements DescribedValue {
UNPROTECTED("Unprotected", "An Excel spreadsheet not protected by a password"),
PASSWORD("Password Protected", "An Excel spreadsheet protected by a password");

ProtectionType(String displayName, String description) {
this.displayName = displayName;
this.description = description;
}

private final String displayName;
private final String description;

@Override
public String getValue() {
return name();
}

@Override
public String getDisplayName() {
return displayName;
}

@Override
public String getDescription() {
return description;
}
}

public static final PropertyDescriptor REQUIRED_SHEETS = new PropertyDescriptor
.Builder().name("Required Sheets")
.displayName("Required Sheets")
Expand All @@ -83,6 +112,25 @@ public class ExcelReader extends SchemaRegistryService implements RecordReaderFa
.addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR)
.build();

public static final PropertyDescriptor PROTECTION_TYPE = new PropertyDescriptor
.Builder().name("Protection Type")
.displayName("Protection Type")
.description("Specifies whether an Excel spreadsheet is protected by a password or not.")
.required(true)
.allowableValues(ProtectionType.class)
.defaultValue(ProtectionType.UNPROTECTED)
.build();

public static final PropertyDescriptor PASSWORD = new PropertyDescriptor
.Builder().name("Password")
.displayName("Password")
.description("The password for a password protected Excel spreadsheet")
.required(true)
.sensitive(true)
.addValidator(StandardValidators.NON_BLANK_VALIDATOR)
.dependsOn(PROTECTION_TYPE, ProtectionType.PASSWORD)
.build();

private volatile ConfigurationContext configurationContext;
private volatile String dateFormat;
private volatile String timeFormat;
Expand All @@ -106,13 +154,15 @@ public RecordReader createRecordReader(final Map<String, String> variables, fina

final List<String> requiredSheets = getRequiredSheets(variables);
final int firstRow = getStartingRow(variables);
final String password = configurationContext.getProperty(PASSWORD).getValue();
final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
.withDateFormat(dateFormat)
.withRequiredSheets(requiredSheets)
.withFirstRow(firstRow)
.withSchema(schema)
.withTimeFormat(timeFormat)
.withTimestampFormat(timestampFormat)
.withPassword(password)
.build();

return new ExcelRecordReader(configuration, in, logger);
Expand All @@ -123,6 +173,8 @@ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
final List<PropertyDescriptor> properties = new ArrayList<>(super.getSupportedPropertyDescriptors());
properties.add(STARTING_ROW);
properties.add(REQUIRED_SHEETS);
properties.add(PROTECTION_TYPE);
properties.add(PASSWORD);
properties.add(DateTimeUtils.DATE_FORMAT);
properties.add(DateTimeUtils.TIME_FORMAT);
properties.add(DateTimeUtils.TIMESTAMP_FORMAT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public ExcelRecordReader(ExcelRecordReaderConfiguration configuration, InputStre
}

try {
this.rowIterator = new RowIterator(inputStream, configuration.getRequiredSheets(), configuration.getFirstRow(), logger);
this.rowIterator = new RowIterator(inputStream, configuration, logger);
} catch (RuntimeException e) {
throw new MalformedRecordException("Read initial Record from Excel XLSX failed", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public class ExcelRecordReaderConfiguration {
private String dateFormat;
private String timeFormat;
private String timestampFormat;
private String password;
private boolean avoidTempFiles;

private ExcelRecordReaderConfiguration() {
}
Expand Down Expand Up @@ -56,13 +58,23 @@ public String getTimestampFormat() {
return timestampFormat;
}

public String getPassword() {
return password;
}

public boolean isAvoidTempFiles() {
return avoidTempFiles;
}

public static final class Builder {
private RecordSchema schema;
private List<String> requiredSheets;
private int firstRow;
private String dateFormat;
private String timeFormat;
private String timestampFormat;
private String password;
private boolean avoidTempFiles;

public Builder withSchema(RecordSchema schema) {
this.schema = schema;
Expand Down Expand Up @@ -94,6 +106,16 @@ public Builder withTimestampFormat(String timestampFormat) {
return this;
}

public Builder withPassword(String password) {
this.password = password;
return this;
}

public Builder withAvoidTempFiles(boolean avoidTempFiles) {
this.avoidTempFiles = avoidTempFiles;
return this;
}

public ExcelRecordReaderConfiguration build() {
ExcelRecordReaderConfiguration excelRecordReaderConfiguration = new ExcelRecordReaderConfiguration();
excelRecordReaderConfiguration.schema = this.schema;
Expand All @@ -102,6 +124,9 @@ public ExcelRecordReaderConfiguration build() {
excelRecordReaderConfiguration.requiredSheets = this.requiredSheets == null ? Collections.emptyList() : this.requiredSheets;
excelRecordReaderConfiguration.dateFormat = this.dateFormat;
excelRecordReaderConfiguration.firstRow = this.firstRow;
excelRecordReaderConfiguration.password = password;
excelRecordReaderConfiguration.avoidTempFiles = avoidTempFiles;

return excelRecordReaderConfiguration;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@ public ExcelRecordSource(final InputStream in, final PropertyContext context, fi
final Integer rawFirstRow = context.getProperty(ExcelReader.STARTING_ROW).evaluateAttributeExpressions(variables).asInteger();
final int firstRow = rawFirstRow == null ? NumberUtils.toInt(ExcelReader.STARTING_ROW.getDefaultValue()) : rawFirstRow;
final int zeroBasedFirstRow = ExcelReader.getZeroBasedIndex(firstRow);
this.rowIterator = new RowIterator(in, requiredSheets, zeroBasedFirstRow, logger);
final String password = context.getProperty(ExcelReader.PASSWORD).getValue();
final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
.withRequiredSheets(requiredSheets)
.withFirstRow(zeroBasedFirstRow)
.withPassword(password)
.build();
this.rowIterator = new RowIterator(in, configuration, logger);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,15 @@ class RowIterator implements Iterator<Row>, Closeable {
private Iterator<Row> currentRows;
private Row currentRow;

RowIterator(final InputStream in, final List<String> requiredSheets, final int firstRow, final ComponentLog logger) {
RowIterator(final InputStream in, final ExcelRecordReaderConfiguration configuration, final ComponentLog logger) {
this.workbook = StreamingReader.builder()
.rowCacheSize(100)
.bufferSize(4096)
.password(configuration.getPassword())
.setAvoidTempFiles(configuration.isAvoidTempFiles())
.open(in);

final List<String> requiredSheets = configuration.getRequiredSheets();
if (requiredSheets == null || requiredSheets.isEmpty()) {
this.sheets = this.workbook.iterator();
} else {
Expand All @@ -65,7 +68,7 @@ class RowIterator implements Iterator<Row>, Closeable {
.collect(Collectors.toList()).iterator();
}

this.firstRow = firstRow;
this.firstRow = configuration.getFirstRow();
this.logger = logger;
setCurrent();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,27 @@
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.crypt.EncryptionMode;
import org.apache.poi.poifs.crypt.Encryptor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -50,10 +63,60 @@ public class TestExcelRecordReader {

private static final String DATA_FORMATTING_FILE = "dataformatting.xlsx";
private static final String MULTI_SHEET_FILE = "twoSheets.xlsx";
private static final String PASSWORD = "nifi";
private static final ByteArrayOutputStream PASSWORD_PROTECTED = new ByteArrayOutputStream();
private static final Object[][] DATA = {
{"ID", "Name"},
{1, "Manny"},
{2, "Moe"},
{3, "Jack"},
};

@Mock
ComponentLog logger;

@BeforeAll
static void setUpBeforeAll() throws Exception {
//Generate an Excel file and populate it with data
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
try (XSSFWorkbook workbook = new XSSFWorkbook()) {
final XSSFSheet sheet = workbook.createSheet("User Info");
populateSheet(sheet);
workbook.write(outputStream);
}

//Protect the Excel file with a password
try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) {
EncryptionInfo encryptionInfo = new EncryptionInfo(EncryptionMode.agile);
Encryptor encryptor = encryptionInfo.getEncryptor();
encryptor.confirmPassword(PASSWORD);

try (OPCPackage opc = OPCPackage.open(new ByteArrayInputStream(outputStream.toByteArray()));
OutputStream os = encryptor.getDataStream(poifsFileSystem)) {
opc.save(os);
}
poifsFileSystem.writeFilesystem(PASSWORD_PROTECTED);
}
}

private static void populateSheet(XSSFSheet sheet) {
//Adding the data to the Excel worksheet
int rowCount = 0;
for (Object[] dataRow : DATA) {
Row row = sheet.createRow(rowCount++);
int columnCount = 0;

for (Object field : dataRow) {
Cell cell = row.createCell(columnCount++);
if (field instanceof String) {
cell.setCellValue((String) field);
} else if (field instanceof Integer) {
cell.setCellValue((Integer) field);
}
}
}
}

@Test
public void testNonExcelFile() {
ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
Expand Down Expand Up @@ -220,4 +283,36 @@ public void testSelectAllSheets() throws MalformedRecordException {

assertEquals(7, records.size());
}

@Test
void testPasswordProtected() throws Exception {
RecordSchema schema = getPasswordProtectedSchema();
ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
.withSchema(schema)
.withPassword(PASSWORD)
.withAvoidTempFiles(true)
.build();

InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
ExcelRecordReader recordReader = new ExcelRecordReader(configuration, inputStream, logger);
List<Record> records = getRecords(recordReader, false, false);

assertEquals(DATA.length, records.size());
}

@Test
void testPasswordProtectedWithoutPassword() {
RecordSchema schema = getPasswordProtectedSchema();
ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
.withSchema(schema)
.build();

InputStream inputStream = new ByteArrayInputStream(PASSWORD_PROTECTED.toByteArray());
assertThrows(Exception.class, () -> new ExcelRecordReader(configuration, inputStream, logger));
}

private RecordSchema getPasswordProtectedSchema() {
return new SimpleRecordSchema(Arrays.asList(new RecordField("id", RecordFieldType.INT.getDataType()),
new RecordField("name", RecordFieldType.STRING.getDataType())));
}
}

0 comments on commit bf06ff4

Please sign in to comment.