Skip to content

Commit

Permalink
Gff: make Tags.t a more precise record (#90)
Browse files Browse the repository at this point in the history
  • Loading branch information
smondet committed Jun 3, 2013
1 parent c502dec commit 000a937
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 45 deletions.
46 changes: 23 additions & 23 deletions src/lib/biocaml_gff.ml
Expand Up @@ -31,8 +31,15 @@ let module_error e = Error (`gff e)

module Tags = struct

type t = [ `version of [`two | `three] | `pedantic ] list with sexp
let default = [`version `three; `pedantic]
type t = {
version: [`two | `three];
allow_empty_lines: bool;
sharp_comments: bool;
}
with sexp

let default =
{version = `three; allow_empty_lines = false; sharp_comments = true}


let to_string t = sexp_of_t t |> Sexplib.Sexp.to_string
Expand Down Expand Up @@ -198,25 +205,23 @@ module Transform = struct
output_error (`wrong_row (pos, s))
end

let rec next ?(pedantic=true) ?(sharp_comments=true) ?(version=`three) p =
let rec next ~tags p =
let open Biocaml_lines.Buffer in
let open Result in
match (next_line p :> string option) with
| None -> `not_ready
| Some "" ->
if pedantic
if tags.Tags.allow_empty_lines
then output_error (`empty_line (current_position p))
else next ~pedantic ~sharp_comments ~version p
| Some l when sharp_comments && String.(is_prefix (strip l) ~prefix:"#") ->
else next ~tags p
| Some l when
tags.Tags.sharp_comments && String.(is_prefix (strip l) ~prefix:"#") ->
output_ok (`comment String.(sub l ~pos:1 ~len:(length l - 1)))
| Some l -> parse_row ~version (current_position p) l
| Some l -> parse_row ~version:tags.Tags.version (current_position p) l

let string_to_item ?filename ?(tags=Tags.default) () =
let string_to_item ?filename ~tags () =
let name = sprintf "gff_parser:%s" Option.(value ~default:"<>" filename) in
let pedantic = List.mem tags `pedantic in
let version =
List.find_map tags (function `version v -> Some v | _ -> None) in
let next = next ~pedantic ?version in
let next = next ~tags in
Biocaml_lines.Transform.make_merge_error ~name ?filename ~next ()

let item_to_string_pure version = (function
Expand Down Expand Up @@ -248,28 +253,23 @@ module Transform = struct
] ^ "\n"
)

let item_to_string ?(tags=Tags.default) () =
let version =
List.find_map tags (function `version v -> Some v | _ -> None)
|! Option.value ~default:`three in
let item_to_string ~tags () =
Biocaml_transform.of_function ~name:"gff_to_string"
(item_to_string_pure version)
(item_to_string_pure tags.Tags.version)

end

exception Error of Error.t
let error_to_exn e = Error e

let in_channel_to_item_stream ?(buffer_size=65536) ?filename ?tags inp =
let x = Transform.string_to_item ?tags ?filename () in
let in_channel_to_item_stream
?(buffer_size=65536) ?filename ?(tags=Tags.default) inp =
let x = Transform.string_to_item ~tags ?filename () in
Biocaml_transform.(in_channel_strings_to_stream inp x ~buffer_size)

let in_channel_to_item_stream_exn ?buffer_size ?tags inp =
Stream.result_to_exn ~error_to_exn
(in_channel_to_item_stream ?buffer_size ?tags inp)

let item_to_string ?(tags=Tags.default) item =
let version =
List.find_map tags (function `version v -> Some v | _ -> None)
|! Option.value ~default:`three in
Transform.item_to_string_pure version item
Transform.item_to_string_pure tags.Tags.version item
13 changes: 9 additions & 4 deletions src/lib/biocaml_gff.mli
Expand Up @@ -70,11 +70,16 @@ end

module Tags: sig

type t = [ `version of [`two | `three] | `pedantic ] list
type t = {
version: [`two | `three];
allow_empty_lines: bool;
sharp_comments: bool;
}
(** Additional format-information tags (c.f. {!Biocaml_tags}). *)

val default: t
(** Default tags for a random Gff file: [[`version `three; `pedantic]]. *)
(** Default tags for a random Gff file:
[{version = `three; allow_empty_lines = false; sharp_comments = true}]. *)

val of_string: string ->
(t, [> `gff of [> `tags_of_string of exn ] ]) Core.Result.t
Expand Down Expand Up @@ -114,12 +119,12 @@ module Transform: sig

val string_to_item:
?filename:string ->
?tags: Tags.t ->
tags: Tags.t ->
unit ->
(string, (item, [> Error.parsing]) Core.Result.t) Biocaml_transform.t
(** Create a parsing [Biocaml_transform.t] for a given version. *)

val item_to_string: ?tags: Tags.t -> unit ->
val item_to_string: tags: Tags.t -> unit ->
(item, string) Biocaml_transform.t
(** Create a printer for a given version. *)

Expand Down
8 changes: 4 additions & 4 deletions src/lib/biocaml_track.ml
Expand Up @@ -154,9 +154,9 @@ module Transform = struct

type gff_parse_error = [Error.parsing | Gff.Error.parsing]
type gff_t = [track | Gff.item]
let string_to_gff ?filename ?tags () =
let string_to_gff ?filename ~tags () =
let gff = Gff.Transform.string_to_item ?filename () in
embed_parser ?filename gff
embed_parser ?filename (gff ~tags)
~reconstruct:(function
| `bypassed (Ok f) -> Ok (f :> gff_t)
| `bypassed (Error f) -> Error (f :> [> gff_parse_error])
Expand Down Expand Up @@ -192,8 +192,8 @@ module Transform = struct
| `comment _ | `track _ | `browser _ as x -> `left x
| #Wig.item as y -> `right y)

let gff_to_string ?tags () =
let gff = Gff.Transform.item_to_string ?tags () in
let gff_to_string ~tags () =
let gff = Gff.Transform.item_to_string ~tags () in
make_printer gff ()
~split:(function
| `comment _ | `track _ | `browser _ as x -> `left x
Expand Down
5 changes: 2 additions & 3 deletions src/lib/biocaml_track.mli
Expand Up @@ -110,13 +110,13 @@ module Transform: sig
([ t | Biocaml_wig.item ], string) Biocaml_transform.t
(** Create a printer for track files containing WIG lines. *)

val string_to_gff: ?filename:string -> ?tags: Biocaml_gff.Tags.t -> unit ->
val string_to_gff: ?filename:string -> tags: Biocaml_gff.Tags.t -> unit ->
(string,
([t | Biocaml_gff.item], [> Error.parsing | Biocaml_gff.Error.parsing])
Core.Result.t) Biocaml_transform.t
(** Create a composite parser for UCSC GFF files. *)

val gff_to_string: ?tags: Biocaml_gff.Tags.t -> unit ->
val gff_to_string: tags: Biocaml_gff.Tags.t -> unit ->
([ t | Biocaml_gff.item ], string) Biocaml_transform.t
(** Create a printer for track files containing GFF lines. *)

Expand All @@ -132,4 +132,3 @@ module Transform: sig
(** Create a printer for track files containing Bed(Graph) lines. *)

end

24 changes: 15 additions & 9 deletions src/tests/test_gff.ml
Expand Up @@ -3,7 +3,7 @@ open Biocaml_internal_pervasives
open Biocaml

let test_parser () =
let transfo = Gff.Transform.string_to_item () in
let transfo = Gff.Transform.string_to_item ~tags:Gff.Tags.default () in
let test_line l f =
let joined = (String.concat ~sep:"\t" l) in
Transform.feed transfo (joined ^ "\n");
Expand All @@ -27,7 +27,7 @@ let test_parser () =
pos = (42, 43); score = Some 2.; strand = `plus;
phase = Some 2;
attributes = ["k", ["v"; "v v"]; "big k", ["annoying v"]]});

test_line ["\"big\\tC style\""; "some"; "s"; "42"; "43"; "2."; "+"]
(function | `output (Error (`wrong_row (_, _))) -> true | _ -> false);

Expand Down Expand Up @@ -56,7 +56,10 @@ let test_parser () =
"some=string;djf"]
(function | `output (Error (`wrong_attributes (_, _))) -> true | _ -> false);

let transfo = Gff.Transform.string_to_item ~tags:[`version `two] () in
let transfo =
let tags =
{ Gff.Tags.default with Gff.Tags.version = `two; allow_empty_lines = true} in
Gff.Transform.string_to_item ~tags () in
let test_line l f =
let joined = (String.concat ~sep:"\t" l) in
Transform.feed transfo (joined ^ "\n");
Expand All @@ -73,16 +76,16 @@ let test_parser () =
pos = (42, 43); score = Some 2.; strand = `plus;
phase = Some 2; attributes = ["k", ["v"]; "big\tk", ["annoying v"]]});
()

let test_printer () =
let transfo = Gff.Transform.item_to_string () in
let transfo = Gff.Transform.item_to_string ~tags:Gff.Tags.default () in
let test s item =
Transform.feed transfo item;
let res = Transform.next transfo in
match res with
| `output o ->
if s <> o then eprintf "NOT EQUALS:\n%S\n%S\n%!" s o;
assert_equal ~printer:ident s o
assert_equal ~printer:ident s o
| `not_ready -> assert_bool "not_ready" false
| `end_of_stream -> assert_bool "end_of_stream" false
in
Expand All @@ -100,14 +103,17 @@ let test_printer () =
phase = None; attributes = [
"k", ["v"]; "big k", ["an;no\ting\nv"]
]});
let transfo = Gff.Transform.item_to_string ~tags:[ `version `two] () in
let transfo =
let tags =
{ Gff.Tags.default with Gff.Tags.version = `two; allow_empty_lines = true} in
Gff.Transform.item_to_string ~tags () in
let test s item =
Transform.feed transfo item;
let res = Transform.next transfo in
match res with
| `output o ->
if s <> o then eprintf "NOT EQUALS (version 2):\n%S\n%S\n%!" s o;
assert_equal ~printer:ident s o
assert_equal ~printer:ident s o
| `not_ready -> assert_bool "not_ready" false
| `end_of_stream -> assert_bool "end_of_stream" false
in
Expand All @@ -120,7 +126,7 @@ let test_printer () =
]});
()


let tests = "GFF" >::: [
"Parse GFF" >:: test_parser;
"Print GFF" >:: test_printer;
Expand Down
4 changes: 2 additions & 2 deletions src/tests/test_track.ml
Expand Up @@ -63,7 +63,7 @@ let test_wig_parser () =
()

let test_gff_parser () =
let transfo = Track.Transform.string_to_gff () in
let transfo = Track.Transform.string_to_gff ~tags:Gff.Tags.default () in
let test_line l f =
Transform.feed transfo (l ^ "\n");
assert_bool l (f (Transform.next transfo))
Expand Down Expand Up @@ -130,7 +130,7 @@ let test_wig_printer () =
()

let test_gff_printer () =
let transfo = Track.Transform.gff_to_string () in
let transfo = Track.Transform.gff_to_string ~tags:Gff.Tags.default () in
let test_line i l =
Transform.feed transfo i;
assert_bool l (Transform.next transfo = `output (l ^ "\n"))
Expand Down

0 comments on commit 000a937

Please sign in to comment.