From 3ce00a25e6ef31489ed6a74b00e465f1bde52e98 Mon Sep 17 00:00:00 2001 From: jeanlf Date: Fri, 19 Jan 2024 10:31:13 +0100 Subject: [PATCH] reworked b945f30b --- include/gpac/utf.h | 9 +++-- src/filters/load_bt_xmt.c | 79 ++++++++++++++++++++++++--------------- src/filters/load_text.c | 2 +- src/utils/utf.c | 7 ++-- 4 files changed, 59 insertions(+), 38 deletions(-) diff --git a/include/gpac/utf.h b/include/gpac/utf.h index b44de3a5c4..28cb79ff3d 100644 --- a/include/gpac/utf.h +++ b/include/gpac/utf.h @@ -83,17 +83,18 @@ Gets the length in character of a wide-char string u32 gf_utf8_wcslen(const unsigned short *s); /*! -\brief returns a UTF8 string from a string started with BOM +\brief returns a string from a string started with BOM -Returns UTF8 from data +Returns string from data, potentially converting utf16 to utf8 \param data the string or wide-char string \param size of the data buffer size of the data buffer \param out_ptr set to an allocated buffer if needed for conversion, shall be destroyed by caller. Must not be NULL -\param result set to resulting UTF8 string. Must not be NULL +\param result set to resulting string. Must not be NULL +\param res_size set to length of resulting string. May be NULL \return error if any: GF_IO_ERR if UTF decode error or GF_BAD_PARAM */ -GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr, char **result); +GF_Err gf_utf_get_string_from_bom(const u8 *data, u32 size, char **out_ptr, char **result, u32 *res_size); /*! \brief Checks validity of a UTF8 string diff --git a/src/filters/load_bt_xmt.c b/src/filters/load_bt_xmt.c index 289285d008..f0d299e0ff 100644 --- a/src/filters/load_bt_xmt.c +++ b/src/filters/load_bt_xmt.c @@ -825,12 +825,30 @@ static void ctxload_finalize(GF_Filter *filter) } } +static const char* my_strstr(const char *str, const char *pat, u32 str_len) +{ + u32 len_pat = (u32) strlen(pat); + if (len_pat>str_len) return NULL; + //basically a memmem clone (we don't use for portability reasons) + while (1) { + char *next = memchr(str, pat[0], str_len); + if (!next) return NULL; + u32 left = str_len - (u32) (next-str); + if (left static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterProbeScore *score) { const char *mime_type = NULL; char *dst = NULL; GF_Err e; + u32 probe_size=size; char *res=NULL; /* check gzip magic header */ @@ -839,7 +857,7 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP return "btz|bt.gz|xmt.gz|xmtz|wrl.gz|x3dv.gz|x3dvz|x3d.gz|x3dz"; } - e = gf_utf_get_utf8_string_from_bom(probe_data, size, &dst, &res); + e = gf_utf_get_string_from_bom(probe_data, size, &dst, &res, &probe_size); if (e) return NULL; probe_data = res; @@ -849,53 +867,50 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP //for XML, strip doctype, '); - if (!probe_data) goto exit; - probe_data++; - while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0])) - probe_data ++; + search = ">"; } //for XML, strip xml header else if (!strncmp(probe_data, ""); - if (!probe_data) goto exit; - - probe_data += 2; - while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0])) - probe_data ++; + search = "?>"; } else if (!strncmp(probe_data, ""); - if (!probe_data) goto exit; - probe_data += 3; - while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0])) - probe_data ++; + search = "-->"; } else { break; } + const char *res = my_strstr(probe_data, search, probe_size); + if (!res) goto exit; + res += strlen(search); + probe_size -= (u32) (res - (char*)probe_data); + probe_data = res; + while (probe_size && probe_data[0] && strchr("\n\r\t ", (char) probe_data[0])) { + probe_data ++; + probe_size--; + } } //probe_data is now the first element of the document, if XML //we should refine by getting the xmlns attribute value rather than searching for its value... - if (strstr(probe_data, "http://www.w3.org/1999/XSL/Transform") + if (my_strstr(probe_data, "http://www.w3.org/1999/XSL/Transform", probe_size) ) { } else if (!strncmp(probe_data, "=5) { - if (!gf_utf8_is_legal(data+4, size-4)) - return GF_IO_ERR; /*0: no unicode, 1: UTF-16BE, 2: UTF-16LE*/ if ((data[0]==0xFF) && (data[1]==0xFE)) { if (!data[2] && !data[3]) { @@ -672,6 +671,7 @@ GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr, unicode_type = 1; } } else if ((data[0]==0xEF) && (data[1]==0xBB) && (data[2]==0xBF)) { + if (res_size) *res_size = size-4; *result = (char *) (data+4); return GF_OK; } @@ -725,6 +725,7 @@ GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr, return GF_IO_ERR; } *result = dst; + if (res_size) *res_size = res+1; return GF_OK; }