Skip to content
Permalink
Browse files

Handle big entries in bgl

  • Loading branch information...
Abs62 committed Jan 18, 2013
1 parent 77ded2c commit 160db20a043942c083d0b133e68b0c60d131fe03
Showing with 77 additions and 6 deletions.
  1. +1 −1 bgl.cc
  2. +76 −5 bgl_babylon.cc
2 bgl.cc
@@ -50,7 +50,7 @@ namespace
enum
{
Signature = 0x584c4742, // BGLX on little-endian, XLGB on big-endian
CurrentFormatVersion = 18 + BtreeIndexing::FormatVersion
CurrentFormatVersion = 19 + BtreeIndexing::FormatVersion
};

struct IdxHeader
@@ -208,6 +208,7 @@ bool Babylon::read(std::string &source_charset, std::string &target_charset)
case 1:
case 7:
case 10:
case 11:
// Only count entries
m_numEntries++;
break;
@@ -323,6 +324,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )

bgl_block block;
unsigned int len, pos;
unsigned int alts_num;
std::string headword, displayedHeadword;
std::string definition;
std::string temp;
@@ -359,6 +361,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
case 1:
case 7:
case 10:
case 11:
alternate.clear();
headword.clear();
displayedHeadword.clear();
@@ -368,8 +371,23 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
pos = 0;

// Headword
len = 0;
len = (unsigned char)block.data[pos++];
if( block.type == 11 )
{
pos = 1;
len = 0;
if( pos + 4 > block.length )
break;
for( int i = 0; i < 4; i++ )
{
len = len << 8;
len |= (unsigned char)block.data[ pos++ ];
}
}
else
{
len = (unsigned char)block.data[pos++];
}

if( pos + len > block.length )
break;

@@ -383,10 +401,63 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
if( headword.find( "&#" ) != string::npos )
headword = Html::unescapeUtf8( headword );

if( block.type == 11 )
{
// Alternate forms
if( pos + 4 >= block.length )
break;

alts_num = 0;
for( int i = 0; i < 4; i++ )
{
alts_num = alts_num << 8;
alts_num |= (unsigned char)block.data[ pos++ ];
}

for( unsigned j = 0; j < alts_num; j++ )
{
len = 0;
if( pos + 4 > block.length )
break;
for( int i = 0; i < 4; i++ )
{
len = len << 8;
len |= (unsigned char)block.data[ pos++ ];
}
if( pos + len >= block.length )
break;
alternate.reserve( len );
for(unsigned int a=0;a<len;a++) alternate += block.data[pos++];
convertToUtf8( alternate, SOURCE_CHARSET );

// Try to repair malformed forms
if( alternate.find( "&#" ) != string::npos )
alternate = Html::unescapeUtf8( alternate );

alternates.push_back( alternate );
alternate.clear();
}
}

// Definition
len = 0;
len = (unsigned char)block.data[pos++] << 8;
len |= (unsigned char)block.data[pos++];

if( block.type == 11 )
{
len = 0;
if( pos + 4 > block.length )
break;
for( int i = 0; i < 4; i++ )
{
len = len << 8;
len |= (unsigned char)block.data[ pos++ ];
}
}
else
{
len = (unsigned char)block.data[pos++] << 8;
len |= (unsigned char)block.data[pos++];
}

if( pos + len > block.length )
break;

0 comments on commit 160db20

Please sign in to comment.
You can’t perform that action at this time.