/
biocaml_vcf.mli
87 lines (73 loc) · 2.44 KB
/
biocaml_vcf.mli
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
open Biocaml_internal_pervasives
type vcf_id = string
type vcf_description = string
type vcf_number =
| Number of int
| OnePerAllele
| OnePerGenotype
| Unknown
type vcf_format_type = [ `integer_value
| `float_value
| `character_value
| `string_value
]
type vcf_info_type = [ vcf_format_type | `flag_value ]
type vcf_info_meta = Info of vcf_number * vcf_info_type * vcf_description
type vcf_filter_meta = Filter of vcf_description
type vcf_format_meta = Format of vcf_number * vcf_format_type * vcf_description
type vcf_alt_meta = Alt of vcf_description
type vcf_meta = {
vcfm_version : string;
vcfm_id_cache: vcf_id Set.Poly.t;
vcfm_info : (vcf_id, vcf_info_meta) Hashtbl.t;
vcfm_filters : (vcf_id * vcf_filter_meta) list;
vcfm_format : (vcf_id, vcf_format_meta) Hashtbl.t;
vcfm_alt : (string, vcf_alt_meta) Hashtbl.t;
vcfm_arbitrary : (string, string) Hashtbl.t;
vcfm_header : string list
}
type vcf_format = [ `integer of int
| `float of float
| `character of char
| `string of string
]
type vcf_info = [ vcf_format | `flag of string ]
type vcf_row = {
vcfr_chrom : string; (* FIXME(superbobry): Biocaml_chrName.t *)
vcfr_pos : int;
vcfr_ids : string list;
vcfr_ref : string;
vcfr_alts : string list;
vcfr_qual : float option;
vcfr_filter : vcf_id list;
vcfr_info : (vcf_id, vcf_info list) Hashtbl.t
}
type item = vcf_row
module Pos : module type of Biocaml_pos
type vcf_parse_row_error =
[ `invalid_int of string
| `invalid_float of string
| `info_type_coersion_failure of vcf_info_type * string
| `invalid_dna of string
| `unknown_info of vcf_id
| `unknown_filter of vcf_id
| `unknown_alt of string
| `duplicate_ids of vcf_id list
| `invalid_arguments_length of vcf_id * int * int
| `arbitrary_width_rows_not_supported
]
type vcf_parse_error =
[ `malformed_meta of Pos.t * string
| `malformed_row of Pos.t * vcf_parse_row_error * string
| `malformed_header of Pos.t * string
| `arbitrary_width_rows_not_supported of Pos.t
| `incomplete_input of Pos.t * Biocaml_lines.item list * string option
| `not_ready
]
val parse_error_to_string : vcf_parse_error -> string
module Transform : sig
val string_to_item :
?filename:string ->
unit ->
(string, (item, vcf_parse_error) Core.Result.t) Biocaml_transform.t
end