diff --git a/Errno.cpp b/Errno.cpp index 6f65c9339..32e81c70f 100644 --- a/Errno.cpp +++ b/Errno.cpp @@ -198,6 +198,10 @@ case EMALFORMEDQUERY: return "Malformed query"; case ESHARDDOWN: return "One or more shards are down"; case EDOCWARC: return "Doc is WARC or ARC and support is disabled"; case EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY: return "Diffbot request of third-party content timed out"; +case EDIFFBOTTOOMANYTEXTNODES: return "The selected pages contains too many TextNodes (>50000) for Diffbot"; +case EDIFFBOTCURLYREPLY: return "Diffbot reply was {}"; +case EDIFFBOTTOKENUNAUTHORIZED: return "Diffbot token was unauthorized"; +case EDIFFBOTPLAINERROR: return "Diffbot error code was 500"; } // if the remote error bit is clear it must be a regulare errno //if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum ); diff --git a/Errno.h b/Errno.h index 3b79be398..d9f4e3291 100644 --- a/Errno.h +++ b/Errno.h @@ -203,6 +203,10 @@ enum { ESHARDDOWN, EDOCWARC, EWRONGSHARD, - EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY + EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY, + EDIFFBOTTOOMANYTEXTNODES, + EDIFFBOTCURLYREPLY, + EDIFFBOTTOKENUNAUTHORIZED, + EDIFFBOTPLAINERROR }; #endif diff --git a/XmlDoc.cpp b/XmlDoc.cpp index c87121447..b488896ae 100644 --- a/XmlDoc.cpp +++ b/XmlDoc.cpp @@ -15856,6 +15856,10 @@ void gotDiffbotReplyWrapper ( void *state , TcpSocket *s ) { char *err = strstr(page,"\"error\":\""); if ( err ) err += 9; int32_t code = EDIFFBOTUNKNOWNERROR; + if ( ! err && + page[0]=='{' && + page[1]=='}' ) + code = EDIFFBOTCURLYREPLY; if ( err && !strncmp(err,"Unable to apply rules",21)) code = EDIFFBOTUNABLETOAPPLYRULES; // like .pdf pages get this error @@ -15871,17 +15875,23 @@ void gotDiffbotReplyWrapper ( void *state , TcpSocket *s ) { code = EDIFFBOTVERSIONREQ; if ( err && !strncmp(err,"Empty content",13)) code = EDIFFBOTEMPTYCONTENT; + if ( err && !strncmp(err,"The selected pages contains too many TextNodes",46)) + code = EDIFFBOTTOOMANYTEXTNODES; if ( err && !strncmp(err,"No content received",19)) code = EDIFFBOTEMPTYCONTENT; if ( err && !strncmp(err,"Request timed",13)) code = EDIFFBOTREQUESTTIMEDOUT; - if ( err &&!strncmp(err,"Request of third-party c",13)) + if ( err &&!strncmp(err,"Request of third-party c",24)) code = EDIFFBOTREQUESTTIMEDOUTTHIRDPARTY; // error processing url if ( err && !strncmp(err,"Error processing",16)) code = EDIFFBOTURLPROCESSERROR; if ( err && !strncmp(err,"Your token has exp",18)) code = EDIFFBOTTOKENEXPIRED; + if ( err && !strncmp(err,"Not authorized API tok",22)) + code = EDIFFBOTTOKENUNAUTHORIZED; + if ( err && !strncmp(err,"Error.",6) ) + code = EDIFFBOTPLAINERROR; THIS->m_diffbotReplyError = code; } // a hack for detecting if token is expired