Skip to content

Commit

Permalink
Merge branch 'PHP-5.3' into PHP-5.4
Browse files Browse the repository at this point in the history
* PHP-5.3:
  - fix test for 5.11
  Fix bug #61504, potential vuln. in fileinfo. update to 5.11
  • Loading branch information
pierrejoye committed Mar 27, 2012
2 parents 50bdc48 + d0e3289 commit a817465
Show file tree
Hide file tree
Showing 26 changed files with 75,986 additions and 41,256 deletions.
111,381 changes: 72,204 additions & 39,177 deletions ext/fileinfo/data_file.c

Large diffs are not rendered by default.

1,312 changes: 624 additions & 688 deletions ext/fileinfo/libmagic.patch

Large diffs are not rendered by default.

353 changes: 272 additions & 81 deletions ext/fileinfo/libmagic/apprentice.c

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion ext/fileinfo/libmagic/apptype.c
Expand Up @@ -27,7 +27,7 @@
#include "file.h"

#ifndef lint
FILE_RCSID("@(#)$File: apptype.c,v 1.11 2009/02/04 18:24:32 christos Exp $")
FILE_RCSID("@(#)$File: apptype.c,v 1.13 2011/09/07 21:57:15 christos Exp $")
#endif /* lint */

#include <stdlib.h>
Expand Down Expand Up @@ -72,6 +72,7 @@ file_os2_apptype(struct magic_set *ms, const char *fn, const void *buf,
if (fwrite(buf, 1, nb, fp) != nb) {
file_error(ms, errno, "cannot write tmp file `%s'",
path);
(void)fclose(fp);
return -1;
}
(void)fclose(fp);
Expand Down
129 changes: 50 additions & 79 deletions ext/fileinfo/libmagic/ascmagic.c
Expand Up @@ -26,8 +26,7 @@
* SUCH DAMAGE.
*/
/*
* ASCII magic -- file types that we know based on keywords
* that can appear anywhere in the file.
* ASCII magic -- try to detect text encoding.
*
* Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
* to handle character codes other than ASCII on a unified basis.
Expand All @@ -36,7 +35,7 @@
#include "file.h"

#ifndef lint
FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $")
FILE_RCSID("@(#)$File: ascmagic.c,v 1.84 2011/12/08 12:38:24 rrt Exp $")
#endif /* lint */

#include "magic.h"
Expand All @@ -47,13 +46,11 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $")
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "names.h"

#define MAXLINELEN 300 /* longest sane line length */
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
|| (x) == 0x85 || (x) == '\f')

private int ascmatch(const unsigned char *, const unichar *, size_t);
private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
private size_t trim_nuls(const unsigned char *, size_t);

Expand All @@ -71,7 +68,8 @@ trim_nuls(const unsigned char *buf, size_t nbytes)
}

protected int
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes,
int text)
{
unichar *ubuf = NULL;
size_t ulen;
Expand All @@ -88,29 +86,24 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)

/* If file doesn't look like any sort of text, give up. */
if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime,
&type) == 0) {
&type) == 0)
rv = 0;
goto done;
}
else
rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code,
type, text);

rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code,
type);

done:
if (ubuf)
free(ubuf);
free(ubuf);

return rv;
}

protected int
file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
size_t nbytes, unichar *ubuf, size_t ulen, const char *code,
const char *type)
const char *type, int text)
{
unsigned char *utf8_buf = NULL, *utf8_end;
size_t mlen, i;
const struct names *p;
int rv = -1;
int mime = ms->flags & MAGIC_MIME;

Expand All @@ -125,6 +118,7 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
int n_lf = 0;
int n_cr = 0;
int n_nel = 0;
int executable = 0;

size_t last_line_end = (size_t)-1;
int has_long_lines = 0;
Expand All @@ -140,54 +134,21 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
goto done;
}

/* Convert ubuf to UTF-8 and try text soft magic */
/* malloc size is a conservative overestimate; could be
improved, or at least realloced after conversion. */
mlen = ulen * 6;
utf8_buf = emalloc(mlen);

if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
goto done;
if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf),
TEXTTEST)) != 0)
goto done;
else
rv = -1;

/* look for tokens from names.h - this is expensive! */
if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0)
goto subtype_identified;

i = 0;
while (i < ulen) {
size_t end;

/* skip past any leading space */
while (i < ulen && ISSPC(ubuf[i]))
i++;
if (i >= ulen)
break;

/* find the next whitespace */
for (end = i + 1; end < nbytes; end++)
if (ISSPC(ubuf[end]))
break;

/* compare the word thus isolated against the token list */
for (p = names; p < names + NNAMES; p++) {
if (ascmatch((const unsigned char *)p->name, ubuf + i,
end - i)) {
subtype = types[p->type].human;
subtype_mime = types[p->type].mime;
goto subtype_identified;
}
}
if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) {
/* Convert ubuf to UTF-8 and try text soft magic */
/* malloc size is a conservative overestimate; could be
improved, or at least realloced after conversion. */
mlen = ulen * 6;
utf8_buf = emalloc(mlen);

i = end;
if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen))
== NULL)
goto done;
if ((rv = file_softmagic(ms, utf8_buf,
(size_t)(utf8_end - utf8_buf), TEXTTEST, text)) == 0)
rv = -1;
}

subtype_identified:

/* Now try to discover other details about the file. */
for (i = 0; i < ulen; i++) {
if (ubuf[i] == '\n') {
Expand Down Expand Up @@ -230,7 +191,7 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
goto done;
}
if (mime) {
if ((mime & MAGIC_MIME_TYPE) != 0) {
if (!file_printedlen(ms) && (mime & MAGIC_MIME_TYPE) != 0) {
if (subtype_mime) {
if (file_printf(ms, "%s", subtype_mime) == -1)
goto done;
Expand All @@ -240,6 +201,28 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
}
}
} else {
if (file_printedlen(ms)) {
switch (file_replace(ms, " text$", ", ")) {
case 0:
switch (file_replace(ms, " text executable$",
", ")) {
case 0:
if (file_printf(ms, ", ") == -1)
goto done;
case -1:
goto done;
default:
executable = 1;
break;
}
break;
case -1:
goto done;
default:
break;
}
}

if (file_printf(ms, "%s", code) == -1)
goto done;

Expand All @@ -251,6 +234,10 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
if (file_printf(ms, " %s", type) == -1)
goto done;

if (executable)
if (file_printf(ms, " executable") == -1)
goto done;

if (has_long_lines)
if (file_printf(ms, ", with very long lines") == -1)
goto done;
Expand Down Expand Up @@ -313,22 +300,6 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf,
return rv;
}

private int
ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
{
size_t i;

for (i = 0; i < ulen; i++) {
if (s[i] != us[i])
return 0;
}

if (s[i])
return 0;
else
return 1;
}

/*
* Encode Unicode string as UTF-8, returning pointer to character
* after end of string, or NULL if an invalid character is found.
Expand Down

0 comments on commit a817465

Please sign in to comment.