Skip to content

Commit

Permalink
added 4 more diffbot errors so hopefully
Browse files Browse the repository at this point in the history
no more 'unknown diffbot error' error codes
in crawlbot.
  • Loading branch information
gigablast committed Jan 12, 2016
1 parent 032f597 commit 1e24821
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Errno.cpp
Expand Up @@ -198,6 +198,10 @@ case EMALFORMEDQUERY: return "Malformed query";
case ESHARDDOWN: return "One or more shards are down";
case EDOCWARC: return "Doc is WARC or ARC and support is disabled";
case EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY: return "Diffbot request of third-party content timed out";
case EDIFFBOTTOOMANYTEXTNODES: return "The selected pages contains too many TextNodes (>50000) for Diffbot";
case EDIFFBOTCURLYREPLY: return "Diffbot reply was {}";
case EDIFFBOTTOKENUNAUTHORIZED: return "Diffbot token was unauthorized";
case EDIFFBOTPLAINERROR: return "Diffbot error code was 500";
}
// if the remote error bit is clear it must be a regulare errno
//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
Expand Down
6 changes: 5 additions & 1 deletion Errno.h
Expand Up @@ -203,6 +203,10 @@ enum {
ESHARDDOWN,
EDOCWARC,
EWRONGSHARD,
EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY
EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY,
EDIFFBOTTOOMANYTEXTNODES,
EDIFFBOTCURLYREPLY,
EDIFFBOTTOKENUNAUTHORIZED,
EDIFFBOTPLAINERROR
};
#endif
12 changes: 11 additions & 1 deletion XmlDoc.cpp
Expand Up @@ -15856,6 +15856,10 @@ void gotDiffbotReplyWrapper ( void *state , TcpSocket *s ) {
char *err = strstr(page,"\"error\":\"");
if ( err ) err += 9;
int32_t code = EDIFFBOTUNKNOWNERROR;
if ( ! err &&
page[0]=='{' &&
page[1]=='}' )
code = EDIFFBOTCURLYREPLY;
if ( err && !strncmp(err,"Unable to apply rules",21))
code = EDIFFBOTUNABLETOAPPLYRULES;
// like .pdf pages get this error
Expand All @@ -15871,17 +15875,23 @@ void gotDiffbotReplyWrapper ( void *state , TcpSocket *s ) {
code = EDIFFBOTVERSIONREQ;
if ( err && !strncmp(err,"Empty content",13))
code = EDIFFBOTEMPTYCONTENT;
if ( err && !strncmp(err,"The selected pages contains too many TextNodes",46))
code = EDIFFBOTTOOMANYTEXTNODES;
if ( err && !strncmp(err,"No content received",19))
code = EDIFFBOTEMPTYCONTENT;
if ( err && !strncmp(err,"Request timed",13))
code = EDIFFBOTREQUESTTIMEDOUT;
if ( err &&!strncmp(err,"Request of third-party c",13))
if ( err &&!strncmp(err,"Request of third-party c",24))
code = EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY;
// error processing url
if ( err && !strncmp(err,"Error processing",16))
code = EDIFFBOTURLPROCESSERROR;
if ( err && !strncmp(err,"Your token has exp",18))
code = EDIFFBOTTOKENEXPIRED;
if ( err && !strncmp(err,"Not authorized API tok",22))
code = EDIFFBOTTOKENUNAUTHORIZED;
if ( err && !strncmp(err,"Error.",6) )
code = EDIFFBOTPLAINERROR;
THIS->m_diffbotReplyError = code;
}
// a hack for detecting if token is expired
Expand Down

0 comments on commit 1e24821

Please sign in to comment.