Skip to content

Commit

Permalink
fix dictionary parse error:
Browse files Browse the repository at this point in the history
1,mdx dictionary load error in windows.
2,dsl dictionary load error in windows.
  • Loading branch information
xiaoyifang committed Oct 18, 2021
1 parent c1eef3a commit ded545e
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 107 deletions.
88 changes: 21 additions & 67 deletions dsl_details.cc
Original file line number Diff line number Diff line change
Expand Up @@ -891,6 +891,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
}

iconv.reinit( encoding );
codec=QTextCodec::codecForName(iconv.getEncodingNameFor(encoding));

// We now can use our own readNextLine() function

Expand Down Expand Up @@ -1009,7 +1010,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
for( ; ; )
{
// Check that we have bytes to read
if ( readBufferLeft < 4 ) // To convert one char, we need at most 4 bytes
if ( readBufferLeft < 1000 ) // To convert one char, we need at most 4 bytes
{
if ( !gzeof( f ) )
{
Expand All @@ -1026,76 +1027,25 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,

readBufferPtr = readBuffer;
readBufferLeft += (size_t) result;
frag = QByteArray::fromRawData(readBuffer, readBufferLeft);
}
}

if ( readBufferLeft < readMultiple )
{
// No more data. Return what we've got so far, forget the last byte if
// it was a 16-bit Unicode and a file had an odd number of bytes.
readBufferLeft = 0;

if ( outPtr != &wcharBuffer.front() )
{
// If there was a stray \r, remove it
if ( outPtr[ -1 ] == L'\r' )
--outPtr;

out = wstring( &wcharBuffer.front(), outPtr - &wcharBuffer.front() );

++linesRead;

return true;
}
else
//QByteArray frag=QByteArray::fromRawData(readBuffer,readBufferLeft);
QTextStream in(frag);
if(in.atEnd())
return false;
}

// Check that we have chars to write
if ( leftInOut < 2 ) // With 16-bit wchars, 2 is needed for a surrogate pair
{
wcharBuffer.resize( wcharBuffer.size() + 64 );
outPtr = &wcharBuffer.front() + wcharBuffer.size() - 64 - leftInOut;
leftInOut += 64;
}

// Ok, now convert one char
size_t outBytesLeft = sizeof( wchar );

Iconv::Result r =
iconv.convert( (void const *&)readBufferPtr, readBufferLeft,
(void *&)outPtr, outBytesLeft );

if ( r == Iconv::NeedMoreOut && outBytesLeft == sizeof( wchar ) )
{
// Seems to be a surrogate pair with a 16-bit target wchar

outBytesLeft *= 2;
r = iconv.convert( (void const *&)readBufferPtr, readBufferLeft,
(void *&)outPtr, outBytesLeft );
--leftInOut; // Complements the next decremention
}

if ( outBytesLeft )
throw exEncodingError();

--leftInOut;

// Have we got \n?
if ( outPtr[ -1 ] == L'\n' )
{
--outPtr;

// Now kill a \r if there is one, and return the result.
if ( outPtr != &wcharBuffer.front() && outPtr[ -1 ] == L'\r' )
--outPtr;

out = wstring( &wcharBuffer.front(), outPtr - &wcharBuffer.front() );

++linesRead;
in.setCodec(codec);
QString line=in.readLine();
qint64 pos=in.pos();
readBufferPtr+=pos;
readBufferLeft-=pos;
linesRead++;
out=line.toStdU32String();
frag.remove(0, pos);
return true;

return true;
}
}
}

Expand Down Expand Up @@ -1330,8 +1280,12 @@ void expandOptionalParts( wstring & str, list< wstring > * result,
// Limit the amount of results to avoid excessive resource consumption
if ( headwords->size() < 32 )
headwords->push_back( str );
if( !inside_recurse )
result->merge( expanded );
if (!inside_recurse)
{
result->sort();
expanded.sort();
result->merge(expanded);
}
}

static const wstring openBraces( GD_NATIVE_TO_WS( L"{{" ) );
Expand Down
6 changes: 5 additions & 1 deletion dsl_details.hh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <zlib.h>
#include "dictionary.hh"
#include "iconv.hh"
#include <QTextCodec>
#include <QByteArray>

// Implementation details for Dsl, not part of its interface
namespace Dsl {
Expand Down Expand Up @@ -119,11 +121,13 @@ class DslScanner
{
gzFile f;
DslEncoding encoding;
QTextCodec* codec;
DslIconv iconv;
wstring dictionaryName;
wstring langFrom, langTo;
wstring soundDictionary;
char readBuffer[ 65536 ];
char readBuffer[ 3000 ];
QByteArray frag;
char * readBufferPtr;
size_t readBufferLeft;
vector< wchar > wcharBuffer;
Expand Down
2 changes: 1 addition & 1 deletion goldendict.pro
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ QT += core \

greaterThan(QT_MAJOR_VERSION, 4) {
QT += widgets \
webenginewidgets\
webenginewidgets\
printsupport \
help

Expand Down
37 changes: 3 additions & 34 deletions mdictparser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,11 @@
#include <QStringList>
#include <QByteArray>
#include <QFileInfo>
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
#include <QRegularExpression>
#else
#include <QRegExp>
#endif
#include <QDomDocument>
#include <QTextDocumentFragment>
#include <QDataStream>
#include <QTextCodec>

#include "decompress.hh"
#include "gddebug.hh"
Expand Down Expand Up @@ -184,38 +181,10 @@ QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t f
if ( !fromCode || !from )
return QString();

iconv_t conv = iconv_open( "UTF-16//IGNORE", fromCode );
if ( conv == ( iconv_t ) - 1 )
return QString();

vector<char> result;
const static int CHUNK_SIZE = 512;
char buf[CHUNK_SIZE];
char ** inBuf = ( char ** )&from;

while ( fromSize )
{
char * outBuf = buf;
size_t outBytesLeft = CHUNK_SIZE;
size_t ret = iconv( conv, inBuf, &fromSize, &outBuf, &outBytesLeft );

if ( ret == ( size_t ) - 1 )
{
if ( errno != E2BIG )
{
// Real problem
result.clear();
break;
}
}

result.insert( result.end(), buf, buf + CHUNK_SIZE - outBytesLeft );
}

iconv_close( conv );
if ( result.size() <= 2 )
return QString();
return QString::fromUtf16( ( const ushort * )&result.front() );
QTextCodec *codec =QTextCodec::codecForName(fromCode);
return codec->toUnicode(from,fromSize);
}

bool MdictParser::decryptHeadWordIndex(char * buffer, qint64 len)
Expand Down
8 changes: 4 additions & 4 deletions wstring.hh
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ namespace gd
{
#ifdef __WIN32

typedef unsigned int wchar;
typedef std::basic_string< wchar > wstring;
typedef char32_t wchar;
typedef std::u32string wstring;

// GD_NATIVE_TO_WS is used to convert L"" strings to a const pointer to
// wchar.
Expand All @@ -55,8 +55,8 @@ namespace gd

#else

typedef wchar_t wchar;
using std::wstring;
typedef char32_t wchar;
typedef std::u32string wstring;
#define GD_NATIVE_TO_WS( str ) ( str )
#endif
}
Expand Down

0 comments on commit ded545e

Please sign in to comment.