20
20
#include < string>
21
21
#include < algorithm>
22
22
#include < sstream>
23
+ #include < fstream>
24
+ #include < iterator>
25
+ #include < regex>
23
26
24
- // Qtools includes
25
- #include < qregexp.h>
26
- #include < qxml.h>
27
- #include < qfile.h>
28
- #include < qfileinfo.h>
27
+ #include < sys/stat.h>
29
28
30
29
// Xapian include
31
30
#include < xapian.h>
32
31
33
32
#include " version.h"
33
+ #include " xml.h"
34
34
35
35
#define MAX_TERM_LENGTH 245
36
36
@@ -106,13 +106,14 @@ static void addWords(const std::string &s,Xapian::Document &doc,int wfd)
106
106
/* * Adds all identifiers in \a s to document \a doc with weight \a wfd */
107
107
static void addIdentifiers (const std::string &s,Xapian::Document &doc,int wfd)
108
108
{
109
- QRegExp re (" [A-Z_a-z][A-Z_a-z0-9]*" );
110
- int i,l,p=0 ;
111
- QCString qs = s.c_str ();
112
- while ((i=re.match (qs,p,&l))!=-1 )
109
+ std::regex id_re (" [A-Z_a-z][A-Z_a-z0-9]*" );
110
+ auto id_begin = std::sregex_iterator (s.begin (), s.end (), id_re);
111
+ auto id_end = std::sregex_iterator ();
112
+
113
+ for (auto i = id_begin; i!=id_end; ++i)
113
114
{
114
- safeAddTerm (qs. mid (p,i-p). data (),doc,wfd) ;
115
- p=i+l ;
115
+ std::smatch match = *i ;
116
+ safeAddTerm (match. str (),doc,wfd) ;
116
117
}
117
118
}
118
119
@@ -142,12 +143,12 @@ static std::string unescapeXmlEntities(const std::string &s)
142
143
/* * This class is a wrapper around SAX style XML parser, which
143
144
* parses the file without first building a DOM tree in memory.
144
145
*/
145
- class XMLContentHandler : public QXmlDefaultHandler
146
+ class XMLContentHandler
146
147
{
147
148
public:
148
149
/* * Handler for parsing XML data */
149
- XMLContentHandler (const QString &path)
150
- : m_db(( path+" doxysearch.db" ).utf8().data() ,Xapian::DB_CREATE_OR_OVERWRITE),
150
+ XMLContentHandler (const std::string &path)
151
+ : m_db(path+" doxysearch.db" ,Xapian::DB_CREATE_OR_OVERWRITE),
151
152
m_stemmer (" english" )
152
153
{
153
154
m_curFieldName = UnknownField;
@@ -161,7 +162,6 @@ class XMLContentHandler : public QXmlDefaultHandler
161
162
m_db.commit ();
162
163
}
163
164
164
- private:
165
165
enum FieldNames
166
166
{
167
167
UnknownField = 0 ,
@@ -175,13 +175,12 @@ class XMLContentHandler : public QXmlDefaultHandler
175
175
};
176
176
177
177
/* * Handler for a start tag. Called for <doc> and <field> tags */
178
- bool startElement (const QString &, const QString &,
179
- const QString &name, const QXmlAttributes &attrib)
178
+ void startElement (const std::string &name, const XMLHandlers::Attributes &attrib)
180
179
{
181
180
m_data=" " ;
182
181
if (name==" field" )
183
182
{
184
- QString fieldName = attrib. value (" name" );
183
+ std::string fieldName = XMLHandlers:: value (attrib, " name" );
185
184
if (fieldName==" type" ) m_curFieldName=TypeField;
186
185
else if (fieldName==" name" ) m_curFieldName=NameField;
187
186
else if (fieldName==" args" ) m_curFieldName=ArgsField;
@@ -191,11 +190,10 @@ class XMLContentHandler : public QXmlDefaultHandler
191
190
else if (fieldName==" text" ) m_curFieldName=TextField;
192
191
else m_curFieldName=UnknownField;
193
192
}
194
- return TRUE ;
195
193
}
196
194
197
195
/* * Handler for an end tag. Called for </doc> and </field> tags */
198
- bool endElement (const QString &, const QString &, const QString &name)
196
+ void endElement (const std::string &name)
199
197
{
200
198
if (name==" doc" ) // </doc>
201
199
{
@@ -260,16 +258,21 @@ class XMLContentHandler : public QXmlDefaultHandler
260
258
m_curFieldName=UnknownField;
261
259
}
262
260
// reset m_data
263
- return TRUE ;
264
261
}
265
262
266
263
/* * Handler for inline text */
267
- bool characters (const QString & ch)
264
+ void characters (const std::string & ch)
268
265
{
269
- m_data += std::string (ch.utf8 ());
270
- return TRUE ;
266
+ m_data += ch;
271
267
}
272
268
269
+ void error (const std::string &fileName,int lineNr,const std::string &msg)
270
+ {
271
+ std::cerr << " Fatal error at " << fileName << " :" << lineNr << " : " << msg << std::endl;
272
+ }
273
+
274
+ private:
275
+
273
276
// internal state
274
277
Xapian::WritableDatabase m_db;
275
278
Xapian::Document m_doc;
@@ -279,46 +282,39 @@ class XMLContentHandler : public QXmlDefaultHandler
279
282
FieldNames m_curFieldName;
280
283
};
281
284
282
- /* * Class for handling error during XML parsing */
283
- class XMLErrorHandler : public QXmlErrorHandler
284
- {
285
- public:
286
- virtual ~XMLErrorHandler () {}
287
- bool warning ( const QXmlParseException & )
288
- {
289
- return FALSE ;
290
- }
291
- bool error ( const QXmlParseException & )
292
- {
293
- return FALSE ;
294
- }
295
- bool fatalError ( const QXmlParseException &exception )
296
- {
297
- std::cerr << " Fatal error at line " << exception.lineNumber ()
298
- << " column " << exception.columnNumber () << " : "
299
- << exception.message ().utf8 () << std::endl;
300
- return FALSE ;
301
- }
302
- QString errorString () { return " " ; }
303
-
304
- private:
305
- QString errorMsg;
306
- };
307
-
308
285
static void usage (const char *name, int exitVal = 1 )
309
286
{
310
287
std::cerr << " Usage: " << name << " [-o output_dir] searchdata.xml [searchdata2.xml ...]" << std::endl;
311
288
exit (exitVal);
312
289
}
313
290
291
+ // return the contents of a file as a string
292
+ inline std::string fileToString (const std::string &fileName)
293
+ {
294
+ std::ifstream t (fileName);
295
+ std::string result;
296
+ t.seekg (0 , std::ios::end);
297
+ result.reserve (t.tellg ());
298
+ t.seekg (0 , std::ios::beg);
299
+ result.assign (std::istreambuf_iterator<char >(t),
300
+ std::istreambuf_iterator<char >());
301
+ return result;
302
+ }
303
+
304
+ bool dirExists (const char *path)
305
+ {
306
+ struct stat info = {};
307
+ return stat (path,&info)==0 && (info.st_mode &S_IFDIR);
308
+ }
309
+
314
310
/* * main function to index data */
315
311
int main (int argc,const char **argv)
316
312
{
317
313
if (argc<2 )
318
314
{
319
315
usage (argv[0 ]);
320
316
}
321
- QString outputDir;
317
+ std::string outputDir;
322
318
for (int i=1 ;i<argc;i++)
323
319
{
324
320
if (std::string (argv[i])==" -o" )
@@ -332,8 +328,7 @@ int main(int argc,const char **argv)
332
328
{
333
329
i++;
334
330
outputDir=argv[i];
335
- QFileInfo fi (outputDir);
336
- if (!fi.exists () || !fi.isDir ())
331
+ if (!dirExists (outputDir.c_str ()))
337
332
{
338
333
std::cerr << " Error: specified output directory does not exist!" << std::endl;
339
334
usage (argv[0 ]);
@@ -353,12 +348,16 @@ int main(int argc,const char **argv)
353
348
354
349
try
355
350
{
356
- if (!outputDir.isEmpty () && outputDir.at (outputDir.length ()-1 )!=pathSep)
351
+ if (!outputDir.empty () && outputDir.at (outputDir.length ()-1 )!=pathSep)
357
352
{
358
353
outputDir+=pathSep;
359
354
}
360
- XMLContentHandler handler (outputDir);
361
- XMLErrorHandler errorHandler;
355
+ XMLContentHandler contentHandler (outputDir);
356
+ XMLHandlers handlers;
357
+ handlers.startElement = [&contentHandler](const std::string &name,const XMLHandlers::Attributes &attrs) { contentHandler.startElement (name,attrs); };
358
+ handlers.endElement = [&contentHandler](const std::string &name) { contentHandler.endElement (name); };
359
+ handlers.characters = [&contentHandler](const std::string &chars) { contentHandler.characters (chars); };
360
+ handlers.error = [&contentHandler](const std::string &fileName,int lineNr,const std::string &msg) { contentHandler.error (fileName,lineNr,msg); };
362
361
for (int i=1 ;i<argc;i++)
363
362
{
364
363
if (std::string (argv[i])==" -o" )
@@ -367,14 +366,10 @@ int main(int argc,const char **argv)
367
366
}
368
367
else
369
368
{
370
- QString xmlFileName = argv[i];
371
- std::cout << " Processing " << xmlFileName.utf8 () << " ..." << std::endl;
372
- QFile xmlFile (xmlFileName);
373
- QXmlInputSource source (xmlFile);
374
- QXmlSimpleReader reader;
375
- reader.setContentHandler (&handler);
376
- reader.setErrorHandler (&errorHandler);
377
- reader.parse (source);
369
+ std::cout << " Processing " << argv[i] << " ..." << std::endl;
370
+ std::string inputStr = fileToString (argv[i]);
371
+ XMLParser parser (handlers);
372
+ parser.parse (argv[i],inputStr.c_str (),false );
378
373
}
379
374
}
380
375
}
0 commit comments