diff --git a/src/lib/biocaml_gff.ml b/src/lib/biocaml_gff.ml index 9a0f23d..de4eda8 100644 --- a/src/lib/biocaml_gff.ml +++ b/src/lib/biocaml_gff.ml @@ -31,8 +31,15 @@ let module_error e = Error (`gff e) module Tags = struct - type t = [ `version of [`two | `three] | `pedantic ] list with sexp - let default = [`version `three; `pedantic] + type t = { + version: [`two | `three]; + allow_empty_lines: bool; + sharp_comments: bool; + } + with sexp + + let default = + {version = `three; allow_empty_lines = false; sharp_comments = true} let to_string t = sexp_of_t t |> Sexplib.Sexp.to_string @@ -198,25 +205,23 @@ module Transform = struct output_error (`wrong_row (pos, s)) end - let rec next ?(pedantic=true) ?(sharp_comments=true) ?(version=`three) p = + let rec next ~tags p = let open Biocaml_lines.Buffer in let open Result in match (next_line p :> string option) with | None -> `not_ready | Some "" -> - if pedantic + if tags.Tags.allow_empty_lines then output_error (`empty_line (current_position p)) - else next ~pedantic ~sharp_comments ~version p - | Some l when sharp_comments && String.(is_prefix (strip l) ~prefix:"#") -> + else next ~tags p + | Some l when + tags.Tags.sharp_comments && String.(is_prefix (strip l) ~prefix:"#") -> output_ok (`comment String.(sub l ~pos:1 ~len:(length l - 1))) - | Some l -> parse_row ~version (current_position p) l + | Some l -> parse_row ~version:tags.Tags.version (current_position p) l - let string_to_item ?filename ?(tags=Tags.default) () = + let string_to_item ?filename ~tags () = let name = sprintf "gff_parser:%s" Option.(value ~default:"<>" filename) in - let pedantic = List.mem tags `pedantic in - let version = - List.find_map tags (function `version v -> Some v | _ -> None) in - let next = next ~pedantic ?version in + let next = next ~tags in Biocaml_lines.Transform.make_merge_error ~name ?filename ~next () let item_to_string_pure version = (function @@ -248,20 +253,18 @@ module Transform = struct ] ^ "\n" ) - let item_to_string ?(tags=Tags.default) () = - let version = - List.find_map tags (function `version v -> Some v | _ -> None) - |! Option.value ~default:`three in + let item_to_string ~tags () = Biocaml_transform.of_function ~name:"gff_to_string" - (item_to_string_pure version) + (item_to_string_pure tags.Tags.version) end exception Error of Error.t let error_to_exn e = Error e -let in_channel_to_item_stream ?(buffer_size=65536) ?filename ?tags inp = - let x = Transform.string_to_item ?tags ?filename () in +let in_channel_to_item_stream + ?(buffer_size=65536) ?filename ?(tags=Tags.default) inp = + let x = Transform.string_to_item ~tags ?filename () in Biocaml_transform.(in_channel_strings_to_stream inp x ~buffer_size) let in_channel_to_item_stream_exn ?buffer_size ?tags inp = @@ -269,7 +272,4 @@ let in_channel_to_item_stream_exn ?buffer_size ?tags inp = (in_channel_to_item_stream ?buffer_size ?tags inp) let item_to_string ?(tags=Tags.default) item = - let version = - List.find_map tags (function `version v -> Some v | _ -> None) - |! Option.value ~default:`three in - Transform.item_to_string_pure version item + Transform.item_to_string_pure tags.Tags.version item diff --git a/src/lib/biocaml_gff.mli b/src/lib/biocaml_gff.mli index 875ceff..0df4fb5 100644 --- a/src/lib/biocaml_gff.mli +++ b/src/lib/biocaml_gff.mli @@ -70,11 +70,16 @@ end module Tags: sig - type t = [ `version of [`two | `three] | `pedantic ] list + type t = { + version: [`two | `three]; + allow_empty_lines: bool; + sharp_comments: bool; + } (** Additional format-information tags (c.f. {!Biocaml_tags}). *) val default: t - (** Default tags for a random Gff file: [[`version `three; `pedantic]]. *) + (** Default tags for a random Gff file: + [{version = `three; allow_empty_lines = false; sharp_comments = true}]. *) val of_string: string -> (t, [> `gff of [> `tags_of_string of exn ] ]) Core.Result.t @@ -114,12 +119,12 @@ module Transform: sig val string_to_item: ?filename:string -> - ?tags: Tags.t -> + tags: Tags.t -> unit -> (string, (item, [> Error.parsing]) Core.Result.t) Biocaml_transform.t (** Create a parsing [Biocaml_transform.t] for a given version. *) - val item_to_string: ?tags: Tags.t -> unit -> + val item_to_string: tags: Tags.t -> unit -> (item, string) Biocaml_transform.t (** Create a printer for a given version. *) diff --git a/src/lib/biocaml_track.ml b/src/lib/biocaml_track.ml index 12e4237..341e9eb 100644 --- a/src/lib/biocaml_track.ml +++ b/src/lib/biocaml_track.ml @@ -154,9 +154,9 @@ module Transform = struct type gff_parse_error = [Error.parsing | Gff.Error.parsing] type gff_t = [track | Gff.item] - let string_to_gff ?filename ?tags () = + let string_to_gff ?filename ~tags () = let gff = Gff.Transform.string_to_item ?filename () in - embed_parser ?filename gff + embed_parser ?filename (gff ~tags) ~reconstruct:(function | `bypassed (Ok f) -> Ok (f :> gff_t) | `bypassed (Error f) -> Error (f :> [> gff_parse_error]) @@ -192,8 +192,8 @@ module Transform = struct | `comment _ | `track _ | `browser _ as x -> `left x | #Wig.item as y -> `right y) - let gff_to_string ?tags () = - let gff = Gff.Transform.item_to_string ?tags () in + let gff_to_string ~tags () = + let gff = Gff.Transform.item_to_string ~tags () in make_printer gff () ~split:(function | `comment _ | `track _ | `browser _ as x -> `left x diff --git a/src/lib/biocaml_track.mli b/src/lib/biocaml_track.mli index a4b327c..1d8d173 100644 --- a/src/lib/biocaml_track.mli +++ b/src/lib/biocaml_track.mli @@ -110,13 +110,13 @@ module Transform: sig ([ t | Biocaml_wig.item ], string) Biocaml_transform.t (** Create a printer for track files containing WIG lines. *) - val string_to_gff: ?filename:string -> ?tags: Biocaml_gff.Tags.t -> unit -> + val string_to_gff: ?filename:string -> tags: Biocaml_gff.Tags.t -> unit -> (string, ([t | Biocaml_gff.item], [> Error.parsing | Biocaml_gff.Error.parsing]) Core.Result.t) Biocaml_transform.t (** Create a composite parser for UCSC GFF files. *) - val gff_to_string: ?tags: Biocaml_gff.Tags.t -> unit -> + val gff_to_string: tags: Biocaml_gff.Tags.t -> unit -> ([ t | Biocaml_gff.item ], string) Biocaml_transform.t (** Create a printer for track files containing GFF lines. *) @@ -132,4 +132,3 @@ module Transform: sig (** Create a printer for track files containing Bed(Graph) lines. *) end - diff --git a/src/tests/test_gff.ml b/src/tests/test_gff.ml index 6c74b40..dc3c25f 100644 --- a/src/tests/test_gff.ml +++ b/src/tests/test_gff.ml @@ -3,7 +3,7 @@ open Biocaml_internal_pervasives open Biocaml let test_parser () = - let transfo = Gff.Transform.string_to_item () in + let transfo = Gff.Transform.string_to_item ~tags:Gff.Tags.default () in let test_line l f = let joined = (String.concat ~sep:"\t" l) in Transform.feed transfo (joined ^ "\n"); @@ -27,7 +27,7 @@ let test_parser () = pos = (42, 43); score = Some 2.; strand = `plus; phase = Some 2; attributes = ["k", ["v"; "v v"]; "big k", ["annoying v"]]}); - + test_line ["\"big\\tC style\""; "some"; "s"; "42"; "43"; "2."; "+"] (function | `output (Error (`wrong_row (_, _))) -> true | _ -> false); @@ -56,7 +56,10 @@ let test_parser () = "some=string;djf"] (function | `output (Error (`wrong_attributes (_, _))) -> true | _ -> false); - let transfo = Gff.Transform.string_to_item ~tags:[`version `two] () in + let transfo = + let tags = + { Gff.Tags.default with Gff.Tags.version = `two; allow_empty_lines = true} in + Gff.Transform.string_to_item ~tags () in let test_line l f = let joined = (String.concat ~sep:"\t" l) in Transform.feed transfo (joined ^ "\n"); @@ -73,16 +76,16 @@ let test_parser () = pos = (42, 43); score = Some 2.; strand = `plus; phase = Some 2; attributes = ["k", ["v"]; "big\tk", ["annoying v"]]}); () - + let test_printer () = - let transfo = Gff.Transform.item_to_string () in + let transfo = Gff.Transform.item_to_string ~tags:Gff.Tags.default () in let test s item = Transform.feed transfo item; let res = Transform.next transfo in match res with | `output o -> if s <> o then eprintf "NOT EQUALS:\n%S\n%S\n%!" s o; - assert_equal ~printer:ident s o + assert_equal ~printer:ident s o | `not_ready -> assert_bool "not_ready" false | `end_of_stream -> assert_bool "end_of_stream" false in @@ -100,14 +103,17 @@ let test_printer () = phase = None; attributes = [ "k", ["v"]; "big k", ["an;no\ting\nv"] ]}); - let transfo = Gff.Transform.item_to_string ~tags:[ `version `two] () in + let transfo = + let tags = + { Gff.Tags.default with Gff.Tags.version = `two; allow_empty_lines = true} in + Gff.Transform.item_to_string ~tags () in let test s item = Transform.feed transfo item; let res = Transform.next transfo in match res with | `output o -> if s <> o then eprintf "NOT EQUALS (version 2):\n%S\n%S\n%!" s o; - assert_equal ~printer:ident s o + assert_equal ~printer:ident s o | `not_ready -> assert_bool "not_ready" false | `end_of_stream -> assert_bool "end_of_stream" false in @@ -120,7 +126,7 @@ let test_printer () = ]}); () - + let tests = "GFF" >::: [ "Parse GFF" >:: test_parser; "Print GFF" >:: test_printer; diff --git a/src/tests/test_track.ml b/src/tests/test_track.ml index 42478ec..ad75dea 100644 --- a/src/tests/test_track.ml +++ b/src/tests/test_track.ml @@ -63,7 +63,7 @@ let test_wig_parser () = () let test_gff_parser () = - let transfo = Track.Transform.string_to_gff () in + let transfo = Track.Transform.string_to_gff ~tags:Gff.Tags.default () in let test_line l f = Transform.feed transfo (l ^ "\n"); assert_bool l (f (Transform.next transfo)) @@ -130,7 +130,7 @@ let test_wig_printer () = () let test_gff_printer () = - let transfo = Track.Transform.gff_to_string () in + let transfo = Track.Transform.gff_to_string ~tags:Gff.Tags.default () in let test_line i l = Transform.feed transfo i; assert_bool l (Transform.next transfo = `output (l ^ "\n"))