PR#4777 (Scanf tests fail) and PR#4910 format concatenation.

git-svn-id: http://caml.inria.fr/svn/ocaml/version/3.11@9412 f963ae5c-01c2-4b8c-9fe0-0dff7051ff02
bmeurer · Nov 9, 2009 · 6c93714 · 6c93714
1 parent 8c64306
commit 6c93714
Show file tree

Hide file tree

Showing 11 changed files with 39 additions and 17 deletions.
diff --git a/VERSION b/VERSION
@@ -1,4 +1,4 @@
-3.11.2+dev4 (2009-10-30)
+3.11.2+dev5 (2009-11-09)
 
 # The version string is the first line of this file.
 # It must be in the format described in stdlib/sys.mli

diff --git a/boot/ocamlc b/boot/ocamlc
diff --git a/boot/ocamldep b/boot/ocamldep
diff --git a/boot/ocamllex b/boot/ocamllex
diff --git a/parsing/parser.mly b/parsing/parser.mly
@@ -1403,7 +1403,13 @@ signed_constant:
   | MINUS INT32                                 { Const_int32(Int32.neg $2) }
   | MINUS INT64                                 { Const_int64(Int64.neg $2) }
   | MINUS NATIVEINT                             { Const_nativeint(Nativeint.neg $2) }
+  | PLUS INT                                    { Const_int $2 }
+  | PLUS FLOAT                                  { Const_float $2 }
+  | PLUS INT32                                  { Const_int32 $2 }
+  | PLUS INT64                                  { Const_int64 $2 }
+  | PLUS NATIVEINT                              { Const_nativeint $2 }
 ;
+
 /* Identifiers and long identifiers */
 
 ident:

diff --git a/stdlib/pervasives.ml b/stdlib/pervasives.ml
@@ -398,7 +398,7 @@ external incr: int ref -> unit = "%incr"
 external decr: int ref -> unit = "%decr"
 
 (* Formats *)
-type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6 
+type ('a, 'b, 'c, 'd) format4 = ('a, 'b, 'c, 'c, 'c, 'd) format6
 
 type ('a, 'b, 'c) format = ('a, 'b, 'c, 'c) format4
 
@@ -416,7 +416,8 @@ let (( ^^ ) :
       ('f, 'b, 'c, 'e, 'g, 'h) format6 ->
       ('a, 'b, 'c, 'd, 'g, 'h) format6) =
   fun fmt1 fmt2 ->
-    string_to_format (format_to_string fmt1 ^ format_to_string fmt2);;
+    string_to_format (format_to_string fmt1 ^ "%," ^ format_to_string fmt2)
+;;
 
 let string_of_format fmt =
   let s = format_to_string fmt in

diff --git a/stdlib/printf.ml b/stdlib/printf.ml
@@ -215,7 +215,7 @@ let iter_on_format_args fmt add_conv add_char =
   and scan_conv skip i =
     if i > lim then incomplete_format fmt else
     match Sformat.unsafe_get fmt i with
-    | '%' | '!' -> succ i
+    | '%' | '!' | ',' -> succ i
     | 's' | 'S' | '[' -> add_conv skip i 's'
     | 'c' | 'C' -> add_conv skip i 'c'
     | 'd' | 'i' |'o' | 'u' | 'x' | 'X' | 'N' -> add_conv skip i 'i'
@@ -433,10 +433,7 @@ let format_float_lexeme =
   let valid_float_lexeme sfmt s =
     let l = String.length s in
     if l = 0 then "nan" else
-      let add_dot sfmt s =
-        if s.[0] = ' ' || s.[0] = '+' || s.[0] = '0'
-        then String.sub s 1 (l - 1) ^ "."
-        else s ^ "." in
+      let add_dot sfmt s = s ^ "." in
 
       let rec loop i =
         if i >= l then add_dot sfmt s else
@@ -562,6 +559,7 @@ let scan_format fmt args n pos cont_s cont_a cont_t cont_f cont_m =
         let s = format_int (extract_format_int 'n' fmt pos i widths) x in
         cont_s (next_index spec n) s (succ i)
       end
+    | ',' -> cont_s n "" (succ i)
     | '!' -> cont_f n (succ i)
     | '{' | '(' as conv (* ')' '}' *) ->
       let (xf : ('a, 'b, 'c, 'd, 'e, 'f) format6) = get_arg spec n in

diff --git a/stdlib/printf.mli b/stdlib/printf.mli
@@ -78,6 +78,8 @@ val fprintf : out_channel -> ('a, out_channel, unit) format -> 'a
      type as [fmt].
    - [!]: take no argument and flush the output.
    - [%]: take no argument and output one [%] character.
+   - [,]: take no argument and do nothing (useful to delimit a conversion
+     specification).
 
    The optional [flags] are:
    - [-]: left-justify the output (default is right justification).

diff --git a/stdlib/scanf.ml b/stdlib/scanf.ml
@@ -407,11 +407,18 @@ let compatible_format_type fmt1 fmt2 =
    In this case, the character c has been explicitely specified in the
    format as being mandatory in the input; hence we should fail with
    End_of_file in case of end_of_input.
-   That's why we use checked_peek_char here. *)
-let check_char ib c =
+   That's why we use checked_peek_char here.
+   We are also careful to treat "\r\n" in the input as a end of line marker: it
+   always matches a '\n' specification in the input format string.
+ *)
+let rec check_char ib c =
   let ci = Scanning.checked_peek_char ib in
-  if ci = c then Scanning.invalidate_current_char ib else
-    character_mismatch c ci
+  if ci = c then Scanning.invalidate_current_char ib else begin
+    match ci with
+    | '\r' when c = '\n' ->
+      Scanning.invalidate_current_char ib; check_char ib '\n'
+    | _ -> character_mismatch c ci
+  end
 ;;
 
 (* Checks that the current char is indeed one of the stopper characters,
@@ -779,6 +786,8 @@ let scan_String max ib =
     let c = Scanning.checked_peek_char ib in
     if Scanning.eof ib then bad_input "a string" else
     match c, s with
+    | '\r', true ->
+      skip_spaces true (Scanning.ignore_char ib max)
     | '\n', true
     | ' ', false ->
       skip_spaces false (Scanning.ignore_char ib max)
@@ -1194,6 +1203,8 @@ let scan_format ib ef fmt rv f =
       | '!' ->
         if Scanning.end_of_input ib then scan_fmt ir f (succ i)
         else bad_input "end of input not found"
+      | ',' ->
+        scan_fmt ir f (succ i)
       | '_' ->
         if i > lim then incomplete_format fmt else
         scan_conversion true max ir f (succ i)

diff --git a/stdlib/scanf.mli b/stdlib/scanf.mli
@@ -197,12 +197,15 @@ val bscanf : Scanning.scanbuf -> ('a, 'b, 'c, 'd) scanner;;
 (** {7 The space character in format strings} *)
 
 (** As mentioned above, a plain character in the format string is just
-    matched with the characters of the input; however, one character is a
-    special exception to this simple rule: the space character (ASCII code
-    32) does not match a single space character, but any amount of
+    matched with the next character of the input; however, two characters are
+    special exceptions to this rule: the space character ([' '] or ASCII code
+    32) and the line feed character (['\n'] or ASCII code 10).
+    A space does not match a single space character, but any amount of
     ``whitespace'' in the input. More precisely, a space inside the format
     string matches {e any number} of tab, space, line feed and carriage
-    return characters.
+    return characters. Similarly, a line feed character in the format string
+    matches either a single line feed or a carriage return followed by a line
+    feed.
 
     Matching {e any} amount of whitespace, a space in the format string
     also matches no amount of whitespace at all; hence, the call [bscanf ib
@@ -287,6 +290,7 @@ val bscanf : Scanning.scanbuf -> ('a, 'b, 'c, 'd) scanner;;
     - [N] or [L]: returns the number of tokens read so far.
     - [!]: matches the end of input condition.
     - [%]: matches one [%] character in the input.
+    - [,]: does nothing (useful to delimit a conversion specification).
 
     Following the [%] character that introduces a conversion, there may be
     the special flag [_]: the conversion that follows occurs as usual,

diff --git a/typing/typecore.ml b/typing/typecore.ml
@@ -752,7 +752,7 @@ let type_format loc fmt =
       and scan_conversion i j =
         if j >= len then incomplete_format fmt else
         match fmt.[j] with
-        | '%' | '!' -> scan_format (j + 1)
+        | '%' | '!' | ',' -> scan_format (j + 1)
         | 's' | 'S' -> conversion j Predef.type_string
         | '[' ->
           let j = range_closing_index fmt j in