Skip to content

Commit

Permalink
remember BOM when reading file to set pos correctly ref #13932
Browse files Browse the repository at this point in the history
Signed-off-by: m-kro <m.barthauer@t-online.de>
  • Loading branch information
m-kro committed Oct 16, 2023
1 parent 6233b0d commit 8aa3668
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/utils/importio/LineReader.cpp
Expand Up @@ -15,6 +15,7 @@
/// @author Daniel Krajzewicz
/// @author Laura Bieker
/// @author Michael Behrisch
/// @author Mirko Barthauer
/// @date Fri, 19 Jul 2002
///
// Retrieves a file linewise and reports the lines to a handler.
Expand Down Expand Up @@ -119,7 +120,7 @@ std::string
LineReader::readLine() {
std::string toReport;
while (toReport.length() == 0 && myStrm.good()) {
const std::string::size_type idx = myStrBuffer.find('\n');
const std::string::size_type idx = myStrBuffer.find('\n');
if (idx == 0) {
myStrBuffer = myStrBuffer.substr(1);
myRread++;
Expand All @@ -137,6 +138,7 @@ LineReader::readLine() {
? myAvailable - myRead
: 1024);
int noBytes = myAvailable - myRead;
bool bomAtStart = myBuffer[0] == '\xef' && myBuffer[1] == '\xbb' && myBuffer[2] == '\xbf';
noBytes = noBytes > 1024 ? 1024 : noBytes;
myStrBuffer += std::string(myBuffer, noBytes);
myRead += 1024;
Expand Down Expand Up @@ -204,7 +206,8 @@ LineReader::reinit() {
// check for BOM
myStrm.read(myBuffer, 3);
if (myBuffer[0] == '\xef' && myBuffer[1] == '\xbb' && myBuffer[2] == '\xbf') {
myAvailable -= 3;
mySkipBOM = 3;
myAvailable -= mySkipBOM;
} else {
myStrm.seekg(0, std::ios::beg);
}
Expand All @@ -218,7 +221,7 @@ LineReader::reinit() {

void
LineReader::setPos(unsigned long pos) {
myStrm.seekg(pos, std::ios::beg);
myStrm.seekg(pos + mySkipBOM, std::ios::beg);
myRead = pos;
myRread = pos;
myStrBuffer = "";
Expand Down
2 changes: 2 additions & 0 deletions src/utils/importio/LineReader.h
Expand Up @@ -169,4 +169,6 @@ class LineReader {
/// @brief Information how many lines were read for meaningful error messages
int myLinesRead;

/// @brief Number of skipped characters at the file begin (UTF-8 BOM)
int mySkipBOM;
};

0 comments on commit 8aa3668

Please sign in to comment.