Skip to content

Commit

Permalink
IMPALA-2477: Parquet metadata randomly 'appears stale'
Browse files Browse the repository at this point in the history
Stream::ReadBytes() could fail by other reasons than
'stale metadata'. Adding Errorcode Check to make sure
Impala return proper error message.

It also fixes IMPALA-2488 metadata.test_stale_metadata
fails on non-hdfs filesystem.

Change-Id: I9a25df3fb49f721bf68d1b07f42a96ce170abbaa
Reviewed-on: http://gerrit.cloudera.org:8080/1166
Reviewed-by: Juan Yu <jyu@cloudera.com>
Tested-by: Internal Jenkins
  • Loading branch information
yjwater authored and tbobrovytsky committed Oct 7, 2015
1 parent e7dbc79 commit 711e759
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 22 deletions.
18 changes: 11 additions & 7 deletions be/src/exec/hdfs-parquet-scanner.cc
Expand Up @@ -1702,13 +1702,17 @@ Status HdfsParquetScanner::ProcessFooter(bool* eosr) {
uint8_t* buffer;
bool success = stream_->ReadBytes(len, &buffer, &parse_status_);
if (!success) {
VLOG_QUERY << "Metadata for file '" << stream_->filename() << "' appears stale: "
<< "metadata states file size to be "
<< PrettyPrinter::Print(stream_->file_desc()->file_length, TUnit::BYTES)
<< ", but could only read "
<< PrettyPrinter::Print(stream_->total_bytes_returned(), TUnit::BYTES);
return Status(TErrorCode::STALE_METADATA_FILE_TOO_SHORT, stream_->filename(),
scan_node_->hdfs_table()->fully_qualified_name());
DCHECK(!parse_status_.ok());
if (parse_status_.code() == TErrorCode::SCANNER_INCOMPLETE_READ) {
VLOG_QUERY << "Metadata for file '" << stream_->filename() << "' appears stale: "
<< "metadata states file size to be "
<< PrettyPrinter::Print(stream_->file_desc()->file_length, TUnit::BYTES)
<< ", but could only read "
<< PrettyPrinter::Print(stream_->total_bytes_returned(), TUnit::BYTES);
return Status(TErrorCode::STALE_METADATA_FILE_TOO_SHORT, stream_->filename(),
scan_node_->hdfs_table()->fully_qualified_name());
}
return parse_status_;
}
DCHECK(stream_->eosr());

Expand Down
12 changes: 3 additions & 9 deletions be/src/exec/scanner-context.cc
Expand Up @@ -296,16 +296,10 @@ bool ScannerContext::cancelled() const {
}

Status ScannerContext::Stream::ReportIncompleteRead(int64_t length, int64_t bytes_read) {
stringstream ss;
ss << "Tried to read " << length << " bytes but could only read "
<< bytes_read << " bytes. This may indicate data file corruption. "
<< "(file " << filename() << ", byte offset: " << file_offset() << ")";
return Status(ss.str());
return Status(TErrorCode::SCANNER_INCOMPLETE_READ, length, bytes_read,
filename(), file_offset());
}

Status ScannerContext::Stream::ReportInvalidRead(int64_t length) {
stringstream ss;
ss << "Invalid read of " << length << " bytes. This may indicate data file corruption. "
<< "(file " << filename() << ", byte offset: " << file_offset() << ")";
return Status(ss.str());
return Status(TErrorCode::SCANNER_INVALID_READ, length, filename(), file_offset());
}
6 changes: 6 additions & 0 deletions common/thrift/generate_error_codes.py
Expand Up @@ -191,6 +191,12 @@

("PARQUET_BAD_VERSION_NUMBER", 61, "File '$0' has an invalid version number: $1\\n"
"This could be due to stale metadata. Try running \\\"refresh $2\\\"."),

("SCANNER_INCOMPLETE_READ", 62, "Tried to read $0 bytes but could only read $1 bytes. "
"This may indicate data file corruption. (file $2, byte offset: $3)"),

("SCANNER_INVALID_READ", 63, "Invalid read of $0 bytes. This may indicate data file "
"corruption. (file $1, byte offset: $2)"),
)

import sys
Expand Down
Expand Up @@ -5,9 +5,9 @@ select count(*) from functional_seq_snap.bad_seq_snap
Bad synchronization marker
Expected: '6e 91 6 ec be 78 a0 ac 72 10 7e 41 b4 da 93 3c '
Actual: '6e 91 6 78 78 78 a0 ac 72 10 7e 41 b4 da 93 3c '
Tried to read 896782 bytes but could only read 896777 bytes. This may indicate data file corruption. (file: hdfs://regex:.$
Problem parsing file: hdfs://regex:.$
Decompressor: invalid compressed length. Data is likely corrupt. (1 of 3 similar)
Tried to read 896782 bytes but could only read 896777 bytes. This may indicate data file corruption. (file: hdfs://regex:.$

---- RESULTS
9434
Expand Down
12 changes: 7 additions & 5 deletions tests/metadata/test_stale_metadata.py
Expand Up @@ -5,6 +5,7 @@
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.impala_test_suite import create_single_exec_option_dimension
from tests.util.filesystem_utils import get_fs_path

class TestRewrittenFile(ImpalaTestSuite):
"""Tests that we gracefully handle when a file in HDFS is rewritten outside of Impala
Expand All @@ -15,14 +16,15 @@ class TestRewrittenFile(ImpalaTestSuite):
DATABASE = "test_written_file_" + str(random.randint(0, 10**10))

TABLE_NAME = "alltypes_rewritten_file"
TABLE_LOCATION = "/test-warehouse/%s" % DATABASE
TABLE_LOCATION = get_fs_path("/test-warehouse/%s" % DATABASE)
FILE_NAME = "alltypes.parq"
# file size = 17.8 KB
SHORT_FILE = "/test-warehouse/alltypesagg_parquet/year=2010/month=1/" \
"day=__HIVE_DEFAULT_PARTITION__/*.parq"
SHORT_FILE = get_fs_path("/test-warehouse/alltypesagg_parquet/year=2010/month=1/" \
"day=__HIVE_DEFAULT_PARTITION__/*.parq")
SHORT_FILE_NUM_ROWS = 1000
# file size = 43.3 KB
LONG_FILE = "/test-warehouse/alltypesagg_parquet/year=2010/month=1/day=9/*.parq"
LONG_FILE = get_fs_path("/test-warehouse/alltypesagg_parquet/year=2010/month=1/" \
"day=9/*.parq")
LONG_FILE_NUM_ROWS = 1000

@classmethod
Expand Down Expand Up @@ -110,7 +112,7 @@ def test_delete_file(self, vector):
result = self.client.execute("select * from %s" % self.__full_table_name())
assert False, "Query was expected to fail"
except ImpalaBeeswaxException as e:
assert 'appears stale.' in str(e)
assert 'No such file or directory' in str(e)

# Refresh the table and make sure we get results
self.client.execute("refresh %s" % self.__full_table_name())
Expand Down

0 comments on commit 711e759

Please sign in to comment.