Skip to content

Commit

Permalink
reworked b945f30
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanlf committed Jan 19, 2024
1 parent 2c02b3b commit 3ce00a2
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 38 deletions.
9 changes: 5 additions & 4 deletions include/gpac/utf.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,18 @@ Gets the length in character of a wide-char string
u32 gf_utf8_wcslen(const unsigned short *s);

/*!
\brief returns a UTF8 string from a string started with BOM
\brief returns a string from a string started with BOM
Returns UTF8 from data
Returns string from data, potentially converting utf16 to utf8
\param data the string or wide-char string
\param size of the data buffer
size of the data buffer
\param out_ptr set to an allocated buffer if needed for conversion, shall be destroyed by caller. Must not be NULL
\param result set to resulting UTF8 string. Must not be NULL
\param result set to resulting string. Must not be NULL
\param res_size set to length of resulting string. May be NULL
\return error if any: GF_IO_ERR if UTF decode error or GF_BAD_PARAM
*/
GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr, char **result);
GF_Err gf_utf_get_string_from_bom(const u8 *data, u32 size, char **out_ptr, char **result, u32 *res_size);

/*!
\brief Checks validity of a UTF8 string
Expand Down
79 changes: 49 additions & 30 deletions src/filters/load_bt_xmt.c
Original file line number Diff line number Diff line change
Expand Up @@ -825,12 +825,30 @@ static void ctxload_finalize(GF_Filter *filter)
}
}

static const char* my_strstr(const char *str, const char *pat, u32 str_len)
{
u32 len_pat = (u32) strlen(pat);
if (len_pat>str_len) return NULL;
//basically a memmem clone (we don't use for portability reasons)
while (1) {
char *next = memchr(str, pat[0], str_len);
if (!next) return NULL;
u32 left = str_len - (u32) (next-str);
if (left<len_pat) return NULL;
if (!memcmp(next, pat, len_pat)) return next;
//left is always at least 1
str_len = left-1;
str = next+1;
}
return NULL;
}
#include <gpac/utf.h>
static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterProbeScore *score)
{
const char *mime_type = NULL;
char *dst = NULL;
GF_Err e;
u32 probe_size=size;
char *res=NULL;

/* check gzip magic header */
Expand All @@ -839,7 +857,7 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP
return "btz|bt.gz|xmt.gz|xmtz|wrl.gz|x3dv.gz|x3dvz|x3d.gz|x3dz";
}

e = gf_utf_get_utf8_string_from_bom(probe_data, size, &dst, &res);
e = gf_utf_get_string_from_bom(probe_data, size, &dst, &res, &probe_size);
if (e) return NULL;
probe_data = res;

Expand All @@ -849,53 +867,50 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP

//for XML, strip doctype, <?xml and comments
while (1) {
char *search=NULL;
if (!strncmp(probe_data, "<!DOCTYPE", 9)) {
probe_data = strchr(probe_data, '>');
if (!probe_data) goto exit;
probe_data++;
while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0]))
probe_data ++;
search = ">";
}
//for XML, strip xml header
else if (!strncmp(probe_data, "<?xml", 5)) {
probe_data = strstr(probe_data, "?>");
if (!probe_data) goto exit;

probe_data += 2;
while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0]))
probe_data ++;
search = "?>";
}
else if (!strncmp(probe_data, "<!--", 4)) {
probe_data = strstr(probe_data, "-->");
if (!probe_data) goto exit;
probe_data += 3;
while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0]))
probe_data ++;
search = "-->";
} else {
break;
}
const char *res = my_strstr(probe_data, search, probe_size);
if (!res) goto exit;
res += strlen(search);
probe_size -= (u32) (res - (char*)probe_data);
probe_data = res;
while (probe_size && probe_data[0] && strchr("\n\r\t ", (char) probe_data[0])) {
probe_data ++;
probe_size--;
}
}
//probe_data is now the first element of the document, if XML
//we should refine by getting the xmlns attribute value rather than searching for its value...

if (strstr(probe_data, "http://www.w3.org/1999/XSL/Transform")
if (my_strstr(probe_data, "http://www.w3.org/1999/XSL/Transform", probe_size)
) {
} else if (!strncmp(probe_data, "<XMT-A", strlen("<XMT-A"))
|| strstr(probe_data, "urn:mpeg:mpeg4:xmta:schema:2002")
|| my_strstr(probe_data, "urn:mpeg:mpeg4:xmta:schema:2002", probe_size)
) {
mime_type = "application/x-xmt";
} else if (strstr(probe_data, "<X3D")
|| strstr(probe_data, "http://www.web3d.org/specifications/x3d-3.0.xsd")
} else if (my_strstr(probe_data, "<X3D", probe_size)
|| my_strstr(probe_data, "http://www.web3d.org/specifications/x3d-3.0.xsd", probe_size)
) {
mime_type = "model/x3d+xml";
} else if (strstr(probe_data, "<saf")
|| strstr(probe_data, "urn:mpeg:mpeg4:SAF:2005")
|| strstr(probe_data, "urn:mpeg:mpeg4:LASeR:2005")
} else if (my_strstr(probe_data, "<saf", probe_size)
|| my_strstr(probe_data, "urn:mpeg:mpeg4:SAF:2005", probe_size)
|| my_strstr(probe_data, "urn:mpeg:mpeg4:LASeR:2005", probe_size)
) {
mime_type = "application/x-LASeR+xml";
} else if (!strncmp(probe_data, "<DIMSStream", strlen("<DIMSStream") ) ) {
mime_type = "application/dims";
} else if (!strncmp(probe_data, "<svg", 4) || strstr(probe_data, "http://www.w3.org/2000/svg") ) {
} else if (!strncmp(probe_data, "<svg", 4) || my_strstr(probe_data, "http://www.w3.org/2000/svg", probe_size) ) {
mime_type = "image/svg+xml";
} else if (!strncmp(probe_data, "<widget", strlen("<widget") ) ) {
mime_type = "application/widget";
Expand All @@ -911,8 +926,11 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP
//get first keyword
while (1) {
//strip all spaces and \r\n
while (probe_data[0] && strchr("\n\r\t ", (char) probe_data[0]))
while (probe_size && probe_data[0] && strchr("\n\r\t ", (char) probe_data[0])) {
probe_data ++;
probe_size--;
}
if (!probe_size) goto exit;

//VRML / XRDV files
if (!strncmp(probe_data, "#VRML V2.0", strlen("#VRML V2.0"))) {
Expand All @@ -935,8 +953,10 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP
break;
}
//skip line and go one
probe_data = strchr(probe_data, '\n');
if (!probe_data) goto exit;
const char *next = my_strstr(probe_data, "\n", probe_size);
if (!next) goto exit;
probe_size -= (u32) (next - (char*)probe_data);
probe_data = next;
}

if (!strncmp(probe_data, "InitialObjectDescriptor", strlen("InitialObjectDescriptor"))
Expand All @@ -947,12 +967,11 @@ static const char *ctxload_probe_data(const u8 *probe_data, u32 size, GF_FilterP
|| !strncmp(probe_data, "Layer2D", strlen("Layer2D"))
|| !strncmp(probe_data, "Layer3D", strlen("Layer3D"))
) {
if (strstr(probe_data, "children"))
if (my_strstr(probe_data, "children", probe_size))
mime_type = "application/x-bt";
}
}


exit:

if (dst) gf_free(dst);
Expand Down
2 changes: 1 addition & 1 deletion src/filters/load_text.c
Original file line number Diff line number Diff line change
Expand Up @@ -4341,7 +4341,7 @@ static const char *txtin_probe_data(const u8 *data, u32 data_size, GF_FilterProb
{
char *dst = NULL;
char *res=NULL;
GF_Err e = gf_utf_get_utf8_string_from_bom((char *)data, data_size, &dst, &res);
GF_Err e = gf_utf_get_string_from_bom((char *)data, data_size, &dst, &res, NULL);
if (e) return NULL;

data = res;
Expand Down
7 changes: 4 additions & 3 deletions src/utils/utf.c
Original file line number Diff line number Diff line change
Expand Up @@ -648,16 +648,15 @@ u32 gf_utf8_mbstowcs(unsigned short* dest, size_t len, const char** srcp)


GF_EXPORT
GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr, char **result)
GF_Err gf_utf_get_string_from_bom(const u8 *data, u32 size, char **out_ptr, char **result, u32 *res_size)
{
u32 unicode_type = 0;
if (!out_ptr || !result || !data) return GF_BAD_PARAM;
*out_ptr = NULL;
*result = (char *) data;
if (res_size) *res_size = size;

if (size>=5) {
if (!gf_utf8_is_legal(data+4, size-4))
return GF_IO_ERR;
/*0: no unicode, 1: UTF-16BE, 2: UTF-16LE*/
if ((data[0]==0xFF) && (data[1]==0xFE)) {
if (!data[2] && !data[3]) {
Expand All @@ -672,6 +671,7 @@ GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr,
unicode_type = 1;
}
} else if ((data[0]==0xEF) && (data[1]==0xBB) && (data[2]==0xBF)) {
if (res_size) *res_size = size-4;
*result = (char *) (data+4);
return GF_OK;
}
Expand Down Expand Up @@ -725,6 +725,7 @@ GF_Err gf_utf_get_utf8_string_from_bom(const u8 *data, u32 size, char **out_ptr,
return GF_IO_ERR;
}
*result = dst;
if (res_size) *res_size = res+1;
return GF_OK;
}

Expand Down

0 comments on commit 3ce00a2

Please sign in to comment.