Skip to content

Commit

Permalink
Roll libxml from 44ecefc8 to 1061537e
Browse files Browse the repository at this point in the history
2023-03-26 wellnhofer@aevum.de malloc-fail: Fix buffer overread with HTML doctype declarations
2023-03-26 wellnhofer@aevum.de encoding: Fix error code in asciiToUTF8
2023-03-26 wellnhofer@aevum.de parser: Fix buffer overread in xmlDetectEBCDIC
2023-03-21 wellnhofer@aevum.de parser: Grow input buffer earlier when reading characters
2023-03-21 wellnhofer@aevum.de parser: Rework EBCDIC code page detection
2023-03-21 wellnhofer@aevum.de parser: Limit name length in xmlParseEncName
2023-03-21 wellnhofer@aevum.de parser: Rework shrinking of input buffers

Fixed: 1427882
Bug: 934413
Change-Id: Ic7b377f95a256e2ba48a2be638a9d664bc0bab4e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4374701
Commit-Queue: Joey Arhar <jarhar@chromium.org>
Reviewed-by: David Baron <dbaron@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1122546}
  • Loading branch information
josepharhar authored and Chromium LUCI CQ committed Mar 27, 2023
1 parent 44d14da commit 4a791b0
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 371 deletions.
2 changes: 1 addition & 1 deletion third_party/libxml/README.chromium
@@ -1,6 +1,6 @@
Name: libxml
URL: http://xmlsoft.org
Version: 44ecefc8cc299a66ac21ffec141eb261e92638da
Version: 1061537efdf3874c91fd50d18f98c4b8a3518e52
CPEPrefix: cpe:/a:xmlsoft:libxml2:2.11.0
License: MIT
License File: src/Copyright
Expand Down
13 changes: 5 additions & 8 deletions third_party/libxml/src/HTMLparser.c
Expand Up @@ -411,7 +411,7 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
return(ctxt->token);
}

if ((ctxt->input->end - ctxt->input->cur < 4) &&
if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
(xmlParserGrow(ctxt) < 0))
return(0);

Expand Down Expand Up @@ -3010,9 +3010,9 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
"Unfinished SystemLiteral\n", NULL, NULL);
} else {
NEXT;
if (err == 0)
ret = xmlStrndup((BASE_PTR+startPosition), len);
NEXT;
}

return(ret);
Expand Down Expand Up @@ -3065,9 +3065,9 @@ htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
"Unfinished PubidLiteral\n", NULL, NULL);
} else {
NEXT;
if (err == 0)
ret = xmlStrndup((BASE_PTR + startPosition), len);
NEXT;
}

return(ret);
Expand Down Expand Up @@ -3100,7 +3100,6 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
int nbchar = 0;
int cur,l;

SHRINK;
cur = CUR_CHAR(l);
while (cur != 0) {
if ((cur == '<') && (NXT(1) == '/')) {
Expand Down Expand Up @@ -3358,7 +3357,6 @@ htmlParsePI(htmlParserCtxtPtr ctxt) {
* this is a Processing Instruction.
*/
SKIP(2);
SHRINK;

/*
* Parse the target name and check for special support like
Expand Down Expand Up @@ -3481,7 +3479,6 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {

state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
SHRINK;
SKIP(4);
buf = (xmlChar *) xmlMallocAtomic(size);
if (buf == NULL) {
Expand Down Expand Up @@ -4477,8 +4474,8 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
htmlParseCharData(ctxt);
}

GROW;
SHRINK;
GROW;
}
if (currentNode != NULL) xmlFree(currentNode);
}
Expand Down Expand Up @@ -4920,8 +4917,8 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
htmlParseCharData(ctxt);
}

GROW;
SHRINK;
GROW;
}
if (currentNode != NULL) xmlFree(currentNode);
}
Expand Down
185 changes: 4 additions & 181 deletions third_party/libxml/src/encoding.c
Expand Up @@ -197,7 +197,7 @@ asciiToUTF8(unsigned char* out, int *outlen,
} else {
*outlen = out - outstart;
*inlen = processed - base;
return(-1);
return(-2);
}

processed = (const unsigned char*) in;
Expand Down Expand Up @@ -2037,7 +2037,7 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
* as the return value is 0, else unpredictable.
* The value of @outlen after return is the number of octets produced.
*/
static int
int
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
int *outlen, const unsigned char *in, int *inlen, int flush) {
int ret;
Expand Down Expand Up @@ -2123,189 +2123,12 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
* @out: an xmlBuffer for the output.
* @in: an xmlBuffer for the input
*
* Front-end for the encoding handler input function, but handle only
* the very first line, i.e. limit itself to 45 chars.
*
* Returns the number of byte written if success, or
* -1 general error
* -2 if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want), or
* DEPERECATED: Don't use.
*/
int
xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
xmlBufferPtr in) {
int ret;
int written;
int toconv;

if (handler == NULL) return(-1);
if (out == NULL) return(-1);
if (in == NULL) return(-1);

/* calculate space available */
written = out->size - out->use - 1; /* count '\0' */
toconv = in->use;
/*
* echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
* 45 chars should be sufficient to reach the end of the encoding
* declaration without going too far inside the document content.
* on UTF-16 this means 90bytes, on UCS4 this means 180
* The actual value depending on guessed encoding is passed as @len
* if provided
*/
if (toconv > 180)
toconv = 180;
if (toconv * 2 >= written) {
xmlBufferGrow(out, toconv * 2);
written = out->size - out->use - 1;
}

ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
in->content, &toconv, 0);
xmlBufferShrink(in, toconv);
out->use += written;
out->content[out->use] = 0;
if (ret == -1) ret = -3;

#ifdef DEBUG_ENCODING
switch (ret) {
case 0:
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input\n",
toconv, written);
break;
case -1:
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
toconv, written, in->use);
break;
case -2:
xmlGenericError(xmlGenericErrorContext,
"input conversion failed due to input error\n");
break;
case -3:
xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
toconv, written, in->use);
break;
default:
xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
}
#endif /* DEBUG_ENCODING */
/*
* Ignore when input buffer is not on a boundary
*/
if (ret == -3) ret = 0;
if (ret == -1) ret = 0;
return(written ? written : ret);
}

/**
* xmlCharEncFirstLineInput:
* @input: a parser input buffer
* @len: number of bytes to convert for the first line, or -1
*
* Front-end for the encoding handler input function, but handle only
* the very first line. Point is that this is based on autodetection
* of the encoding and once that first line is converted we may find
* out that a different decoder is needed to process the input.
*
* Returns the number of byte written if success, or
* -1 general error
* -2 if the transcoding fails (for *in is not valid utf8 string or
* the result of transformation can't fit into the encoding we want), or
*/
int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
{
int ret;
size_t written;
size_t toconv;
int c_in;
int c_out;
xmlBufPtr in;
xmlBufPtr out;

if ((input == NULL) || (input->encoder == NULL) ||
(input->buffer == NULL) || (input->raw == NULL))
return (-1);
out = input->buffer;
in = input->raw;

toconv = xmlBufUse(in);
if (toconv == 0)
return (0);
written = xmlBufAvail(out);
/*
* echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
* 45 chars should be sufficient to reach the end of the encoding
* declaration without going too far inside the document content.
* on UTF-16 this means 90bytes, on UCS4 this means 180
* The actual value depending on guessed encoding is passed as @len
* if provided
*/
if (len >= 0) {
if (toconv > (unsigned int) len)
toconv = len;
} else {
if (toconv > 180)
toconv = 180;
}
if (toconv * 2 >= written) {
xmlBufGrow(out, toconv * 2);
written = xmlBufAvail(out);
}
if (written > 360)
written = 360;

c_in = toconv;
c_out = written;
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
xmlBufContent(in), &c_in, 0);
xmlBufShrink(in, c_in);
xmlBufAddLen(out, c_out);
if (ret == -1)
ret = -3;

switch (ret) {
case 0:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input\n",
c_in, c_out);
#endif
break;
case -1:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input, %d left\n",
c_in, c_out, (int)xmlBufUse(in));
#endif
break;
case -3:
#ifdef DEBUG_ENCODING
xmlGenericError(xmlGenericErrorContext,
"converted %d bytes to %d bytes of input, %d left\n",
c_in, c_out, (int)xmlBufUse(in));
#endif
break;
case -2: {
char buf[50];
const xmlChar *content = xmlBufContent(in);

snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
content[0], content[1],
content[2], content[3]);
buf[49] = 0;
xmlEncodingErr(XML_I18N_CONV_FAILED,
"input conversion failed due to input error, bytes %s\n",
buf);
}
}
/*
* Ignore when input buffer is not on a boundary
*/
if (ret == -3) ret = 0;
if (ret == -1) ret = 0;
return(c_out ? c_out : ret);
return(xmlCharEncInFunc(handler, out, in));
}

/**
Expand Down
1 change: 1 addition & 0 deletions third_party/libxml/src/include/libxml/encoding.h
Expand Up @@ -203,6 +203,7 @@ XMLPUBFUN int
xmlCharEncInFunc (xmlCharEncodingHandler *handler,
xmlBufferPtr out,
xmlBufferPtr in);
XML_DEPRECATED
XMLPUBFUN int
xmlCharEncFirstLine (xmlCharEncodingHandler *handler,
xmlBufferPtr out,
Expand Down
3 changes: 2 additions & 1 deletion third_party/libxml/src/include/private/enc.h
Expand Up @@ -8,7 +8,8 @@ XML_HIDDEN void
xmlInitEncodingInternal(void);

XML_HIDDEN int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len);
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
int *outlen, const unsigned char *in, int *inlen, int flush);
XML_HIDDEN int
xmlCharEncInput(xmlParserInputBufferPtr input, int flush);
XML_HIDDEN int
Expand Down
2 changes: 1 addition & 1 deletion third_party/libxml/src/include/private/parser.h
Expand Up @@ -27,7 +27,7 @@ XML_HIDDEN void
xmlHaltParser(xmlParserCtxtPtr ctxt);
XML_HIDDEN int
xmlParserGrow(xmlParserCtxtPtr ctxt);
XML_HIDDEN int
XML_HIDDEN void
xmlParserShrink(xmlParserCtxtPtr ctxt);

#endif /* XML_PARSER_H_PRIVATE__ */

0 comments on commit 4a791b0

Please sign in to comment.