Skip to content

Commit 4a284c8

Browse files
committed
* Enforce limit of 8K on regex searches that have no limits
* Allow the l modifier for regex to mean line count. Default to byte count. If line count is specified, assume a max of 80 characters per line to limit the byte count. * Don't allow conversions to be used for dates, allowing the mask field to be used as an offset. * Bump the version of the magic format so that regex changes are visible.
1 parent 954e2ff commit 4a284c8

File tree

11 files changed

+105
-63
lines changed

11 files changed

+105
-63
lines changed

Diff for: ChangeLog

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
2014-06-02 14:50 Christos Zoulas <christos@zoulas.com>
2+
3+
* Enforce limit of 8K on regex searches that have no limits
4+
* Allow the l modifier for regex to mean line count. Default
5+
to byte count. If line count is specified, assume a max
6+
of 80 characters per line to limit the byte count.
7+
* Don't allow conversions to be used for dates, allowing
8+
the mask field to be used as an offset.
9+
110
2014-05-30 12:51 Christos Zoulas <christos@zoulas.com>
211

312
* Make the range operator limit the length of the

Diff for: doc/magic.man

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.\" $File: magic.man,v 1.82 2014/05/30 16:51:23 christos Exp $
1+
.\" $File: magic.man,v 1.83 2014/06/03 17:36:13 christos Exp $
22
.Dd June 3, 2014
33
.Dt MAGIC __FSECTION__
44
.Os
@@ -232,12 +232,21 @@ The size of the string to search should also be limited by specifying
232232
.Dv /<length> ,
233233
to avoid performance issues scanning long files.
234234
The type specification can also be optionally followed by
235-
.Dv /[c][s] .
235+
.Dv /[c][s][l] .
236236
The
237237
.Dq c
238238
flag makes the match case insensitive, while the
239239
.Dq s
240240
flag update the offset to the start offset of the match, rather than the end.
241+
The
242+
.Dq l
243+
modifier, changes the limit of length to mean number of lines instead of a
244+
byte count.
245+
Lines are delimited by the platforms native line delimiter.
246+
When a line count is specified, an implicit byte count also computed assuming
247+
each line is 80 characters long.
248+
If neither a byte or line count is specified, the search is limited automatically
249+
to 8KiB.
241250
.Dv ^
242251
and
243252
.Dv $
@@ -406,6 +415,9 @@ is octal, and
406415
.Dv 0x13
407416
is hexadecimal.
408417
.Pp
418+
Numeric operations are not performed on date types, instead the numeric
419+
value is interpreted as an offset.
420+
.Pp
409421
For string values, the string from the
410422
file must match the specified string.
411423
The operators

Diff for: magic/Magdir/android

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
#------------------------------------------------------------
3-
# $File: android,v 1.2 2013/11/05 14:00:25 christos Exp $
3+
# $File: android,v 1.3 2013/11/08 01:24:22 christos Exp $
44
# Various android related magic entries
55
#------------------------------------------------------------
66

@@ -89,12 +89,12 @@
8989
>17 string 0\n \b, Not-Compressed
9090
>17 string 1\n \b, Compressed
9191
# any string as long as it's not the word none (which is matched below)
92-
>>19 regex/1 \^([^n\n]|n[^o]|no[^n]|non[^e]|none.+).* \b, Encrypted (%s)
92+
>>19 regex/1l \^([^n\n]|n[^o]|no[^n]|non[^e]|none.+).* \b, Encrypted (%s)
9393
>>19 string none\n \b, Not-Encrypted
9494
# Commented out because they don't seem useful to print
9595
# (but they are part of the header - the tar file comes after them):
96-
#>>>&1 regex/1 .* \b, Password salt: %s
97-
#>>>>&1 regex/1 .* \b, Master salt: %s
98-
#>>>>>&1 regex/1 .* \b, PBKDF2 rounds: %s
99-
#>>>>>>&1 regex/1 .* \b, IV: %s
100-
#>>>>>>>&1 regex/1 .* \b, Key: %s
96+
#>>>&1 regex/1l .* \b, Password salt: %s
97+
#>>>>&1 regex/1l .* \b, Master salt: %s
98+
#>>>>>&1 regex/1l .* \b, PBKDF2 rounds: %s
99+
#>>>>>>&1 regex/1l .* \b, IV: %s
100+
#>>>>>>>&1 regex/1l .* \b, Key: %s

Diff for: magic/Magdir/fortran

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
#------------------------------------------------------------------------------
3-
# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
3+
# $File: fortran,v 1.7 2012/06/21 01:55:02 christos Exp $
44
# FORTRAN source
5-
0 regex/100 \^[Cc][\ \t] FORTRAN program
5+
0 regex/100l \^[Cc][\ \t] FORTRAN program
66
!:mime text/x-fortran
77
!:strength - 5

Diff for: magic/Magdir/graphviz

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11

22
#------------------------------------------------------------------------------
3-
# $File$
3+
# $File: graphviz,v 1.7 2009/09/19 16:28:09 christos Exp $
44
# graphviz: file(1) magic for http://www.graphviz.org/
55

66
# FIXME: These patterns match too generally. For example, the first
77
# line matches a LaTeX file containing the word "graph" (with a {
88
# following later) and the second line matches this file.
9-
#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
9+
#0 regex/100l [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
1010
#!:mime text/vnd.graphviz
11-
#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
11+
#0 regex/100l [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
1212
#!:mime text/vnd.graphviz

Diff for: magic/Magdir/marc21

+6-6
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,17 @@
1212
20 string 45
1313

1414
# leader starts with 5 digits, followed by codes specific to MARC format
15-
>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
15+
>0 regex/1l (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
1616
!:mime application/marc
17-
>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority
17+
>0 regex/1l (^[0-9]{5})[acdnosx][z] MARC21 Authority
1818
!:mime application/marc
19-
>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
19+
>0 regex/1l (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
2020
!:mime application/marc
21-
0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification
21+
0 regex/1l (^[0-9]{5})[acdn][w] MARC21 Classification
2222
!:mime application/marc
23-
>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community
23+
>0 regex/1l (^[0-9]{5})[cdn][q] MARC21 Community
2424
!:mime application/marc
2525

2626
# leader position 22-23, should be "00" but is it?
27-
>0 regex/1 (^.{21})([^0]{2}) (non-conforming)
27+
>0 regex/1l (^.{21})([^0]{2}) (non-conforming)
2828
!:mime application/marc

Diff for: magic/Magdir/scientific

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
#------------------------------------------------------------------------------
3-
# $File: scientific,v 1.7 2010/09/20 19:19:17 rrt Exp $
3+
# $File: scientific,v 1.8 2014/01/06 17:46:23 rrt Exp $
44
# scientific: file(1) magic for scientific formats
55
#
66
# From: Joe Krahn <krahn@niehs.nih.gov>
@@ -91,12 +91,12 @@
9191
# uppercase letters. However, examples have been seen without the date string,
9292
# e.g., the example on the chemime site.
9393
0 string HEADER\ \ \ \
94-
>&0 regex/1 \^.{40}
95-
>>&0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
96-
>>>&0 regex/1s [A-Z0-9]{4}.{14}$
97-
>>>>&0 regex/1 [A-Z0-9]{4} Protein Data Bank data, ID Code %s
94+
>&0 regex/1l \^.{40}
95+
>>&0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
96+
>>>&0 regex/1ls [A-Z0-9]{4}.{14}$
97+
>>>>&0 regex/1l [A-Z0-9]{4} Protein Data Bank data, ID Code %s
9898
!:mime chemical/x-pdb
99-
>>>>0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s
99+
>>>>0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s
100100

101101
# Type: GDSII Stream file
102102
0 belong 0x00060002 GDSII Stream file

Diff for: magic/Magdir/troff

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
#------------------------------------------------------------------------------
3-
# $File$
3+
# $File: troff,v 1.10 2009/09/19 16:28:12 christos Exp $
44
# troff: file(1) magic for *roff
55
#
66
# updated by Daniel Quinlan (quinlan@yggdrasil.com)
@@ -16,9 +16,9 @@
1616
!:mime text/troff
1717
0 search/1 ''' troff or preprocessor input text
1818
!:mime text/troff
19-
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
19+
0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
2020
!:mime text/troff
21-
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
21+
0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
2222
!:mime text/troff
2323

2424
# ditroff intermediate output text

Diff for: src/apprentice.c

+9-3
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#include "file.h"
3333

3434
#ifndef lint
35-
FILE_RCSID("@(#)$File: apprentice.c,v 1.209 2014/05/13 16:42:17 christos Exp $")
35+
FILE_RCSID("@(#)$File: apprentice.c,v 1.210 2014/05/14 23:15:42 christos Exp $")
3636
#endif /* lint */
3737

3838
#include "magic.h"
@@ -1382,7 +1382,8 @@ string_modifier_check(struct magic_set *ms, struct magic *m)
13821382
if ((ms->flags & MAGIC_CHECK) == 0)
13831383
return 0;
13841384

1385-
if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
1385+
if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1386+
(m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
13861387
file_magwarn(ms,
13871388
"'/BHhLl' modifiers are only allowed for pascal strings\n");
13881389
return -1;
@@ -1875,8 +1876,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
18751876
m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
18761877
break;
18771878
case CHAR_PSTRING_4_LE:
1878-
if (m->type != FILE_PSTRING)
1879+
switch (m->type) {
1880+
case FILE_PSTRING:
1881+
case FILE_REGEX:
1882+
break;
1883+
default:
18791884
goto bad;
1885+
}
18801886
m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
18811887
break;
18821888
case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:

Diff for: src/file.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
*/
2828
/*
2929
* file.h - definitions for file(1) program
30-
* @(#)$File: file.h,v 1.150 2014/05/05 20:53:10 christos Exp $
30+
* @(#)$File: file.h,v 1.151 2014/05/14 23:15:42 christos Exp $
3131
*/
3232

3333
#ifndef __file_h__
@@ -133,7 +133,7 @@
133133
#define MAXstring 64 /* max len of "string" types */
134134

135135
#define MAGICNO 0xF11E041C
136-
#define VERSIONNO 11
136+
#define VERSIONNO 12
137137
#define FILE_MAGICSIZE 248
138138

139139
#define FILE_LOAD 0
@@ -321,6 +321,7 @@ struct magic {
321321
#define PSTRING_2_LE BIT(9)
322322
#define PSTRING_4_BE BIT(10)
323323
#define PSTRING_4_LE BIT(11)
324+
#define REGEX_LINE_COUNT BIT(11)
324325
#define PSTRING_LEN \
325326
(PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
326327
#define PSTRING_LENGTH_INCLUDES_ITSELF BIT(12)

0 commit comments

Comments
 (0)