Skip to content
Permalink
Browse files Browse the repository at this point in the history
* Enforce limit of 8K on regex searches that have no limits
* Allow the l modifier for regex to mean line count. Default
  to byte count. If line count is specified, assume a max
  of 80 characters per line to limit the byte count.
* Don't allow conversions to be used for dates, allowing
  the mask field to be used as an offset.
* Bump the version of the magic format so that regex changes
  are visible.
  • Loading branch information
zoulasc committed Jun 3, 2014
1 parent 954e2ff commit 4a284c8
Show file tree
Hide file tree
Showing 11 changed files with 105 additions and 63 deletions.
9 changes: 9 additions & 0 deletions ChangeLog
@@ -1,3 +1,12 @@
2014-06-02 14:50 Christos Zoulas <christos@zoulas.com>

* Enforce limit of 8K on regex searches that have no limits
* Allow the l modifier for regex to mean line count. Default
to byte count. If line count is specified, assume a max
of 80 characters per line to limit the byte count.
* Don't allow conversions to be used for dates, allowing
the mask field to be used as an offset.

2014-05-30 12:51 Christos Zoulas <christos@zoulas.com>

* Make the range operator limit the length of the
Expand Down
16 changes: 14 additions & 2 deletions doc/magic.man
@@ -1,4 +1,4 @@
.\" $File: magic.man,v 1.82 2014/05/30 16:51:23 christos Exp $
.\" $File: magic.man,v 1.83 2014/06/03 17:36:13 christos Exp $
.Dd June 3, 2014
.Dt MAGIC __FSECTION__
.Os
Expand Down Expand Up @@ -232,12 +232,21 @@ The size of the string to search should also be limited by specifying
.Dv /<length> ,
to avoid performance issues scanning long files.
The type specification can also be optionally followed by
.Dv /[c][s] .
.Dv /[c][s][l] .
The
.Dq c
flag makes the match case insensitive, while the
.Dq s
flag update the offset to the start offset of the match, rather than the end.
The
.Dq l
modifier, changes the limit of length to mean number of lines instead of a
byte count.
Lines are delimited by the platforms native line delimiter.
When a line count is specified, an implicit byte count also computed assuming
each line is 80 characters long.
If neither a byte or line count is specified, the search is limited automatically
to 8KiB.
.Dv ^
and
.Dv $
Expand Down Expand Up @@ -406,6 +415,9 @@ is octal, and
.Dv 0x13
is hexadecimal.
.Pp
Numeric operations are not performed on date types, instead the numeric
value is interpreted as an offset.
.Pp
For string values, the string from the
file must match the specified string.
The operators
Expand Down
14 changes: 7 additions & 7 deletions magic/Magdir/android
@@ -1,6 +1,6 @@

#------------------------------------------------------------
# $File: android,v 1.2 2013/11/05 14:00:25 christos Exp $
# $File: android,v 1.3 2013/11/08 01:24:22 christos Exp $
# Various android related magic entries
#------------------------------------------------------------

Expand Down Expand Up @@ -89,12 +89,12 @@
>17 string 0\n \b, Not-Compressed
>17 string 1\n \b, Compressed
# any string as long as it's not the word none (which is matched below)
>>19 regex/1 \^([^n\n]|n[^o]|no[^n]|non[^e]|none.+).* \b, Encrypted (%s)
>>19 regex/1l \^([^n\n]|n[^o]|no[^n]|non[^e]|none.+).* \b, Encrypted (%s)
>>19 string none\n \b, Not-Encrypted
# Commented out because they don't seem useful to print
# (but they are part of the header - the tar file comes after them):
#>>>&1 regex/1 .* \b, Password salt: %s
#>>>>&1 regex/1 .* \b, Master salt: %s
#>>>>>&1 regex/1 .* \b, PBKDF2 rounds: %s
#>>>>>>&1 regex/1 .* \b, IV: %s
#>>>>>>>&1 regex/1 .* \b, Key: %s
#>>>&1 regex/1l .* \b, Password salt: %s
#>>>>&1 regex/1l .* \b, Master salt: %s
#>>>>>&1 regex/1l .* \b, PBKDF2 rounds: %s
#>>>>>>&1 regex/1l .* \b, IV: %s
#>>>>>>>&1 regex/1l .* \b, Key: %s
4 changes: 2 additions & 2 deletions magic/Magdir/fortran
@@ -1,7 +1,7 @@

#------------------------------------------------------------------------------
# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
# $File: fortran,v 1.7 2012/06/21 01:55:02 christos Exp $
# FORTRAN source
0 regex/100 \^[Cc][\ \t] FORTRAN program
0 regex/100l \^[Cc][\ \t] FORTRAN program

This comment has been minimized.

Copy link
@domo141

domo141 Feb 19, 2015

Cool stuff: my README containing line 'C compiler -- required to compile some code' trigger the recognition ;/

!:mime text/x-fortran
!:strength - 5
6 changes: 3 additions & 3 deletions magic/Magdir/graphviz
@@ -1,12 +1,12 @@

#------------------------------------------------------------------------------
# $File$
# $File: graphviz,v 1.7 2009/09/19 16:28:09 christos Exp $
# graphviz: file(1) magic for http://www.graphviz.org/

# FIXME: These patterns match too generally. For example, the first
# line matches a LaTeX file containing the word "graph" (with a {
# following later) and the second line matches this file.
#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
#0 regex/100l [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
#!:mime text/vnd.graphviz
#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
#0 regex/100l [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
#!:mime text/vnd.graphviz
12 changes: 6 additions & 6 deletions magic/Magdir/marc21
Expand Up @@ -12,17 +12,17 @@
20 string 45

# leader starts with 5 digits, followed by codes specific to MARC format
>0 regex/1 (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
>0 regex/1l (^[0-9]{5})[acdnp][^bhlnqsu-z] MARC21 Bibliographic
!:mime application/marc
>0 regex/1 (^[0-9]{5})[acdnosx][z] MARC21 Authority
>0 regex/1l (^[0-9]{5})[acdnosx][z] MARC21 Authority
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
>0 regex/1l (^[0-9]{5})[cdn][uvxy] MARC21 Holdings
!:mime application/marc
0 regex/1 (^[0-9]{5})[acdn][w] MARC21 Classification
0 regex/1l (^[0-9]{5})[acdn][w] MARC21 Classification
!:mime application/marc
>0 regex/1 (^[0-9]{5})[cdn][q] MARC21 Community
>0 regex/1l (^[0-9]{5})[cdn][q] MARC21 Community
!:mime application/marc

# leader position 22-23, should be "00" but is it?
>0 regex/1 (^.{21})([^0]{2}) (non-conforming)
>0 regex/1l (^.{21})([^0]{2}) (non-conforming)
!:mime application/marc
12 changes: 6 additions & 6 deletions magic/Magdir/scientific
@@ -1,6 +1,6 @@

#------------------------------------------------------------------------------
# $File: scientific,v 1.7 2010/09/20 19:19:17 rrt Exp $
# $File: scientific,v 1.8 2014/01/06 17:46:23 rrt Exp $
# scientific: file(1) magic for scientific formats
#
# From: Joe Krahn <krahn@niehs.nih.gov>
Expand Down Expand Up @@ -91,12 +91,12 @@
# uppercase letters. However, examples have been seen without the date string,
# e.g., the example on the chemime site.
0 string HEADER\ \ \ \
>&0 regex/1 \^.{40}
>>&0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
>>>&0 regex/1s [A-Z0-9]{4}.{14}$
>>>>&0 regex/1 [A-Z0-9]{4} Protein Data Bank data, ID Code %s
>&0 regex/1l \^.{40}
>>&0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
>>>&0 regex/1ls [A-Z0-9]{4}.{14}$
>>>>&0 regex/1l [A-Z0-9]{4} Protein Data Bank data, ID Code %s
!:mime chemical/x-pdb
>>>>0 regex/1 [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s
>>>>0 regex/1l [0-9]{2}-[A-Z]{3}-[0-9]{2} \b, %s

# Type: GDSII Stream file
0 belong 0x00060002 GDSII Stream file
Expand Down
6 changes: 3 additions & 3 deletions magic/Magdir/troff
@@ -1,6 +1,6 @@

#------------------------------------------------------------------------------
# $File$
# $File: troff,v 1.10 2009/09/19 16:28:12 christos Exp $
# troff: file(1) magic for *roff
#
# updated by Daniel Quinlan (quinlan@yggdrasil.com)
Expand All @@ -16,9 +16,9 @@
!:mime text/troff
0 search/1 ''' troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
!:mime text/troff
0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
0 regex/20l \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
!:mime text/troff

# ditroff intermediate output text
Expand Down
12 changes: 9 additions & 3 deletions src/apprentice.c
Expand Up @@ -32,7 +32,7 @@
#include "file.h"

#ifndef lint
FILE_RCSID("@(#)$File: apprentice.c,v 1.209 2014/05/13 16:42:17 christos Exp $")
FILE_RCSID("@(#)$File: apprentice.c,v 1.210 2014/05/14 23:15:42 christos Exp $")
#endif /* lint */

#include "magic.h"
Expand Down Expand Up @@ -1382,7 +1382,8 @@ string_modifier_check(struct magic_set *ms, struct magic *m)
if ((ms->flags & MAGIC_CHECK) == 0)
return 0;

if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
(m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
file_magwarn(ms,
"'/BHhLl' modifiers are only allowed for pascal strings\n");
return -1;
Expand Down Expand Up @@ -1875,8 +1876,13 @@ parse(struct magic_set *ms, struct magic_entry *me, const char *line,
m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
break;
case CHAR_PSTRING_4_LE:
if (m->type != FILE_PSTRING)
switch (m->type) {
case FILE_PSTRING:
case FILE_REGEX:
break;
default:
goto bad;
}
m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
break;
case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
Expand Down
5 changes: 3 additions & 2 deletions src/file.h
Expand Up @@ -27,7 +27,7 @@
*/
/*
* file.h - definitions for file(1) program
* @(#)$File: file.h,v 1.150 2014/05/05 20:53:10 christos Exp $
* @(#)$File: file.h,v 1.151 2014/05/14 23:15:42 christos Exp $
*/

#ifndef __file_h__
Expand Down Expand Up @@ -133,7 +133,7 @@
#define MAXstring 64 /* max len of "string" types */

#define MAGICNO 0xF11E041C
#define VERSIONNO 11
#define VERSIONNO 12
#define FILE_MAGICSIZE 248

#define FILE_LOAD 0
Expand Down Expand Up @@ -321,6 +321,7 @@ struct magic {
#define PSTRING_2_LE BIT(9)
#define PSTRING_4_BE BIT(10)
#define PSTRING_4_LE BIT(11)
#define REGEX_LINE_COUNT BIT(11)
#define PSTRING_LEN \
(PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
#define PSTRING_LENGTH_INCLUDES_ITSELF BIT(12)
Expand Down

0 comments on commit 4a284c8

Please sign in to comment.