-
Notifications
You must be signed in to change notification settings - Fork 91
/
ged2gwb.ml
4012 lines (3829 loc) · 127 KB
/
ged2gwb.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(* camlp5r pa_extend.cmo ../src/pa_lock.cmo *)
(* $Id: ged2gwb.ml,v 5.53 2007-09-12 09:58:44 ddr Exp $ *)
(* Copyright (c) 1998-2007 INRIA *)
open Dbdisk;
open Def;
open Mutil;
open Printf;
type person = dsk_person;
type ascend = dsk_ascend;
type union = dsk_union;
type family = dsk_family;
type couple = dsk_couple;
type descend = dsk_descend;
value get_access p = p.Def.access;
value get_aliases p = p.Def.aliases;
value get_baptism p = p.Def.baptism;
value get_baptism_place p = p.Def.baptism_place;
value get_baptism_note p = p.Def.baptism_note;
value get_baptism_src p = p.Def.baptism_src;
value get_birth p = p.Def.birth;
value get_birth_place p = p.Def.birth_place;
value get_birth_note p = p.Def.birth_note;
value get_birth_src p = p.Def.birth_src;
value get_burial p = p.Def.burial;
value get_burial_place p = p.Def.burial_place;
value get_burial_note p = p.Def.burial_note;
value get_burial_src p = p.Def.burial_src;
value get_death p = p.Def.death;
value get_death_place p = p.Def.death_place;
value get_death_note p = p.Def.death_note;
value get_death_src p = p.Def.death_src;
value get_first_name p = p.Def.first_name;
value get_first_names_aliases p = p.Def.first_names_aliases;
value get_image p = p.Def.image;
value get_key_index p = p.Def.key_index;
value get_notes p = p.Def.notes;
value get_occ p = p.Def.occ;
value get_occupation p = p.Def.occupation;
value get_psources p = p.Def.psources;
value get_public_name p = p.Def.public_name;
value get_qualifiers p = p.Def.qualifiers;
value get_related p = p.Def.related;
value get_rparents p = p.Def.rparents;
value get_sex p = p.Def.sex;
value get_surname p = p.Def.surname;
value get_surnames_aliases p = p.Def.surnames_aliases;
value get_titles p = p.Def.titles;
value get_pevents p = p.Def.pevents;
value person_with_key p fn sn oc =
{(p) with first_name = fn; surname = sn; occ = oc}
;
value person_with_related p r = {(p) with related = r};
value person_with_rparents p r = {(p) with rparents = r};
value person_with_sex p s = {(p) with sex = s};
value person_of_gen_person p = p;
value gen_person_of_person p = p;
value get_consang a = a.Def.consang;
value get_parents a = a.Def.parents;
value ascend_with_parents a p = {parents = p; consang = a.consang};
value ascend_of_gen_ascend a = a;
value get_family u = u.Def.family;
value union_of_gen_union u = u;
value get_comment f = f.Def.comment;
value get_divorce f = f.Def.divorce;
value get_fam_index f = f.Def.fam_index;
value get_fsources f = f.Def.fsources;
value get_fevents f = f.Def.fevents;
value get_marriage f = f.Def.marriage;
value get_marriage_place f = f.Def.marriage_place;
value get_marriage_note f = f.Def.marriage_note;
value get_marriage_src f = f.Def.marriage_src;
value get_origin_file f = f.Def.origin_file;
value get_relation f = f.Def.relation;
value get_witnesses f = f.Def.witnesses;
value family_of_gen_family f = f;
value gen_family_of_family f = f;
value get_father c = Adef.father c;
value get_mother c = Adef.mother c;
value get_parent_array c = Adef.parent_array c;
value gen_couple_of_couple c = c;
value couple_of_gen_couple c = c;
value get_children d = d.Def.children;
value descend_of_gen_descend d = d;
value gen_descend_of_descend d = d;
value poi base i = base.data.persons.get (Adef.int_of_iper i);
value aoi base i = base.data.ascends.get (Adef.int_of_iper i);
value uoi base i = base.data.unions.get (Adef.int_of_iper i);
value foi base i = base.data.families.get (Adef.int_of_ifam i);
value coi base i = base.data.couples.get (Adef.int_of_ifam i);
value doi base i = base.data.descends.get (Adef.int_of_ifam i);
value sou base i = base.data.strings.get (Adef.int_of_istr i);
value p_first_name base p = nominative (sou base p.first_name);
value p_surname base p = nominative (sou base p.surname);
value designation base p =
let prenom = p_first_name base p in
let nom = p_surname base p in
prenom ^ "." ^ string_of_int p.occ ^ " " ^ nom
;
value couple _ x y = Adef.couple x y;
value strictly_after = CheckItem.strictly_after;
value strictly_before_dmy = CheckItem.strictly_before_dmy;
value date_of_death = CheckItem.date_of_death;
value year_of d = d.year;
value log_oc = ref stdout;
type record =
{ rlab : string;
rval : string;
rcont : string;
rsons : list record;
rpos : int;
rused : mutable bool }
;
type choice3 'a 'b 'c 'd =
[ Left3 of 'a
| Right3 of 'b and 'c and 'd ]
;
type month_number_dates =
[ MonthDayDates
| DayMonthDates
| NoMonthNumberDates
| MonthNumberHappened of string ]
;
type charset =
[ Ansel
| Ascii
| Msdos
| MacIntosh
| Utf8 ]
;
type case =
[ NoCase
| LowerCase
| UpperCase ]
;
value lowercase_first_names = ref False;
value case_surnames = ref NoCase;
value extract_first_names = ref False;
value extract_public_names = ref True;
value charset_option = ref None;
value charset = ref Ascii;
value alive_years = ref 80;
value dead_years = ref 120;
value try_negative_dates = ref False;
value no_negative_dates = ref False;
value month_number_dates = ref NoMonthNumberDates;
value no_public_if_titles = ref False;
value first_names_brackets = ref None;
value untreated_in_notes = ref False;
value force = ref False;
value default_source = ref "";
value relation_status = ref Married;
value no_picture = ref False;
value do_check = ref True;
(* Reading input *)
value line_cnt = ref 1;
value in_file = ref "";
value print_location pos =
fprintf log_oc.val "File \"%s\", line %d:\n" in_file.val pos
;
value rec skip_eol =
parser
[ [: `'\010' | '\013'; _ = skip_eol :] -> ()
| [: :] -> () ]
;
value rec get_to_eoln len =
parser
[ [: `'\010' | '\013'; _ = skip_eol :] -> Buff.get len
| [: `'\t'; s :] -> get_to_eoln (Buff.store len ' ') s
| [: `c; s :] -> get_to_eoln (Buff.store len c) s
| [: :] -> Buff.get len ]
;
value rec skip_to_eoln =
parser
[ [: `'\010' | '\013'; _ = skip_eol :] -> ()
| [: `_; s :] -> skip_to_eoln s
| [: :] -> () ]
;
value eol_chars = ['\010'; '\013'];
value rec get_ident len =
parser
[ [: `' ' | '\t' :] -> Buff.get len
| [: `c when not (List.mem c eol_chars); s :] ->
get_ident (Buff.store len c) s
| [: :] -> Buff.get len ]
;
value skip_space =
parser
[ [: `' ' | '\t' :] -> ()
| [: :] -> () ]
;
value rec line_start num =
parser
[ [: `' '; s :] -> line_start num s
| [: `x when x = num :] -> () ]
;
value ascii_of_msdos s =
let s' = String.create (String.length s) in
do {
for i = 0 to String.length s - 1 do {
let cc =
match Char.code s.[i] with
[ 0o200 -> 0o307
| 0o201 -> 0o374
| 0o202 -> 0o351
| 0o203 -> 0o342
| 0o204 -> 0o344
| 0o205 -> 0o340
| 0o206 -> 0o345
| 0o207 -> 0o347
| 0o210 -> 0o352
| 0o211 -> 0o353
| 0o212 -> 0o350
| 0o213 -> 0o357
| 0o214 -> 0o356
| 0o215 -> 0o354
| 0o216 -> 0o304
| 0o217 -> 0o305
| 0o220 -> 0o311
| 0o221 -> 0o346
| 0o222 -> 0o306
| 0o223 -> 0o364
| 0o224 -> 0o366
| 0o225 -> 0o362
| 0o226 -> 0o373
| 0o227 -> 0o371
| 0o230 -> 0o377
| 0o231 -> 0o326
| 0o232 -> 0o334
| 0o233 -> 0o242
| 0o234 -> 0o243
| 0o235 -> 0o245
| 0o240 -> 0o341
| 0o241 -> 0o355
| 0o242 -> 0o363
| 0o243 -> 0o372
| 0o244 -> 0o361
| 0o245 -> 0o321
| 0o246 -> 0o252
| 0o247 -> 0o272
| 0o250 -> 0o277
| 0o252 -> 0o254
| 0o253 -> 0o275
| 0o254 -> 0o274
| 0o255 -> 0o241
| 0o256 -> 0o253
| 0o257 -> 0o273
| 0o346 -> 0o265
| 0o361 -> 0o261
| 0o366 -> 0o367
| 0o370 -> 0o260
| 0o372 -> 0o267
| 0o375 -> 0o262
| c -> c ]
in
s'.[i] := Char.chr cc
};
s'
}
;
value ascii_of_macintosh s =
let s' = String.create (String.length s) in
do {
for i = 0 to String.length s - 1 do {
let cc =
match Char.code s.[i] with
[ 0o200 -> 0o304
| 0o201 -> 0o305
| 0o202 -> 0o307
| 0o203 -> 0o311
| 0o204 -> 0o321
| 0o205 -> 0o326
| 0o206 -> 0o334
| 0o207 -> 0o341
| 0o210 -> 0o340
| 0o211 -> 0o342
| 0o212 -> 0o344
| 0o213 -> 0o343
| 0o214 -> 0o345
| 0o215 -> 0o347
| 0o216 -> 0o351
| 0o217 -> 0o350
| 0o220 -> 0o352
| 0o221 -> 0o353
| 0o222 -> 0o355
| 0o223 -> 0o354
| 0o224 -> 0o356
| 0o225 -> 0o357
| 0o226 -> 0o361
| 0o227 -> 0o363
| 0o230 -> 0o362
| 0o231 -> 0o364
| 0o232 -> 0o366
| 0o233 -> 0o365
| 0o234 -> 0o372
| 0o235 -> 0o371
| 0o236 -> 0o373
| 0o237 -> 0o374
| 0o241 -> 0o260
| 0o244 -> 0o247
| 0o245 -> 0o267
| 0o246 -> 0o266
| 0o247 -> 0o337
| 0o250 -> 0o256
| 0o256 -> 0o306
| 0o257 -> 0o330
| 0o264 -> 0o245
| 0o273 -> 0o252
| 0o274 -> 0o272
| 0o276 -> 0o346
| 0o277 -> 0o370
| 0o300 -> 0o277
| 0o301 -> 0o241
| 0o302 -> 0o254
| 0o307 -> 0o253
| 0o310 -> 0o273
| 0o312 -> 0o040
| 0o313 -> 0o300
| 0o314 -> 0o303
| 0o315 -> 0o325
| 0o320 -> 0o255
| 0o326 -> 0o367
| 0o330 -> 0o377
| 0o345 -> 0o302
| 0o346 -> 0o312
| 0o347 -> 0o301
| 0o350 -> 0o313
| 0o351 -> 0o310
| 0o352 -> 0o315
| 0o353 -> 0o316
| 0o354 -> 0o317
| 0o355 -> 0o314
| 0o356 -> 0o323
| 0o357 -> 0o324
| 0o361 -> 0o322
| 0o362 -> 0o332
| 0o363 -> 0o333
| 0o364 -> 0o331
| c -> c ]
in
s'.[i] := Char.chr cc
};
s'
}
;
value utf8_of_string s =
match charset.val with
[ Ansel -> utf_8_of_iso_8859_1 (Ansel.to_iso_8859_1 s)
| Ascii -> Mutil.utf_8_of_iso_8859_1 s
| Msdos -> Mutil.utf_8_of_iso_8859_1 (ascii_of_msdos s)
| MacIntosh -> Mutil.utf_8_of_iso_8859_1 (ascii_of_macintosh s)
| Utf8 -> s ]
;
value rec get_lev n =
parser
[: _ = line_start n; _ = skip_space; r1 = get_ident 0; strm :] ->
let (rlab, rval, rcont, l) =
if String.length r1 > 0 && r1.[0] = '@' then parse_address n r1 strm
else parse_text n r1 strm
in
{rlab = rlab; rval = utf8_of_string rval;
rcont = utf8_of_string rcont; rsons = List.rev l; rpos = line_cnt.val;
rused = False}
and parse_address n r1 =
parser
[: r2 = get_ident 0; r3 = get_to_eoln 0 ? "get to eoln";
l = get_lev_list [] (Char.chr (Char.code n + 1)) ? "get lev list" :] ->
(r2, r1, r3, l)
and parse_text n r1 =
parser
[: r2 = get_to_eoln 0;
l = get_lev_list [] (Char.chr (Char.code n + 1)) ? "get lev list" :] ->
(r1, r2, "", l)
and get_lev_list l n =
parser
[ [: x = get_lev n; s :] -> get_lev_list [x :: l] n s
| [: :] -> l ]
;
(* Error *)
value bad_dates_warned = ref False;
value print_bad_date pos d =
if bad_dates_warned.val then ()
else do {
bad_dates_warned.val := True;
print_location pos;
fprintf log_oc.val "Can't decode date %s\n" d;
flush log_oc.val
}
;
value check_month m =
if m < 1 || m > 12 then do {
fprintf log_oc.val "Bad (numbered) month in date: %d\n" m;
flush log_oc.val
}
else ()
;
value warning_month_number_dates () =
match month_number_dates.val with
[ MonthNumberHappened s ->
do {
fprintf log_oc.val "
Warning: the file holds dates with numbered months (like: 12/05/1912).
GEDCOM standard *requires* that months in dates be identifiers. The
correct form for this example would be 12 MAY 1912 or 5 DEC 1912.
Consider restarting with option \"-dates_dm\" or \"-dates_md\".
Use option -help to see what they do.
(example found in gedcom: \"%s\")
"
s;
flush log_oc.val
}
| _ -> () ]
;
(* Decoding fields *)
value rec skip_spaces =
parser
[ [: `' '; s :] -> skip_spaces s
| [: :] -> () ]
;
value rec ident_slash len =
parser
[ [: `'/' :] -> Buff.get len
| [: `'\t'; a = ident_slash (Buff.store len ' ') :] -> a
| [: `c; a = ident_slash (Buff.store len c) :] -> a
| [: :] -> Buff.get len ]
;
value strip c str =
let start =
loop 0 where rec loop i =
if i = String.length str then i
else if str.[i] = c then loop (i + 1)
else i
in
let stop =
loop (String.length str - 1) where rec loop i =
if i = -1 then i + 1 else if str.[i] = c then loop (i - 1) else i + 1
in
if start = 0 && stop = String.length str then str
else if start >= stop then ""
else String.sub str start (stop - start)
;
value strip_spaces = strip ' ';
value strip_newlines = strip '\n';
value less_greater_escaped s =
let rec need_code i =
if i < String.length s then
match s.[i] with
[ '<' | '>' -> True
| x -> need_code (succ i) ]
else False
in
let rec compute_len i i1 =
if i < String.length s then
let i1 =
match s.[i] with
[ '<' | '>' -> i1 + 4
| _ -> succ i1 ]
in
compute_len (succ i) i1
else i1
in
let rec copy_code_in s1 i i1 =
if i < String.length s then
let i1 =
match s.[i] with
[ '<' -> do { String.blit "<" 0 s1 i1 4; i1 + 4 }
| '>' -> do { String.blit ">" 0 s1 i1 4; i1 + 4 }
| c -> do { s1.[i1] := c; succ i1 } ]
in
copy_code_in s1 (succ i) i1
else s1
in
if need_code 0 then
let len = compute_len 0 0 in
copy_code_in (String.create len) 0 0
else s
;
value parse_name =
parser
[: _ = skip_spaces;
invert =
parser
[ [: `'/' :] -> True
| [: :] -> False ];
f = ident_slash 0; _ = skip_spaces; s = ident_slash 0 :] ->
let (f, s) = if invert then (s, f) else (f, s) in
let f = strip_spaces f in
let s = strip_spaces s in
(if f = "" then "x" else f, if s = "" then "?" else s)
;
value rec find_field lab =
fun
[ [r :: rl] ->
if r.rlab = lab then do { r.rused := True; Some r }
else find_field lab rl
| [] -> None ]
;
value rec find_all_fields lab =
fun
[ [r :: rl] ->
if r.rlab = lab then do {
r.rused := True; [r :: find_all_fields lab rl]
}
else find_all_fields lab rl
| [] -> [] ]
;
value rec find_field_with_value lab v =
fun
[ [r :: rl] ->
if r.rlab = lab && r.rval = v then
do { r.rused := True; True }
else find_field_with_value lab v rl
| [] -> False ]
;
value rec lexing_date =
parser
[ [: `('0'..'9' as c); n = number (Buff.store 0 c) :] -> ("INT", n)
| [: `('A'..'Z' as c); i = ident (Buff.store 0 c) :] -> ("ID", i)
| [: `'('; len = text 0 :] -> ("TEXT", Buff.get len)
| [: `'.' :] -> ("", ".")
| [: `' ' | '\t' | '\013'; s :] -> lexing_date s
| [: _ = Stream.empty :] -> ("EOI", "")
| [: `x :] -> ("", String.make 1 x) ]
and number len =
parser
[ [: `('0'..'9' as c); a = number (Buff.store len c) :] -> a
| [: :] -> Buff.get len ]
and ident len =
parser
[ [: `('A'..'Z' as c); a = ident (Buff.store len c) :] -> a
| [: :] -> Buff.get len ]
and text len =
parser
[ [: `')' :] -> len
| [: `'('; len = text (Buff.store len '('); s :] ->
text (Buff.store len ')') s
| [: `c; s :] -> text (Buff.store len c) s
| [: :] -> len ]
;
value make_date_lexing s = Stream.from (fun _ -> Some (lexing_date s));
value tparse = Token.default_match;
value using_token (p_con, p_prm) =
match p_con with
[ "" | "INT" | "ID" | "TEXT" | "EOI" -> ()
| _ ->
raise
(Token.Error
("the constructor \"" ^ p_con ^
"\" is not recognized by the lexer")) ]
;
value date_lexer =
{Token.tok_func s = (make_date_lexing s, fun _ -> Token.dummy_loc);
Token.tok_using = using_token; Token.tok_removing _ = ();
Token.tok_match = tparse; Token.tok_text _ = "<tok>";
Token.tok_comm = None}
;
type range 'a =
[ Begin of 'a
| End of 'a
| BeginEnd of 'a and 'a ]
;
value date_g = Grammar.gcreate date_lexer;
value date_value = Grammar.Entry.create date_g "date value";
value date_interval = Grammar.Entry.create date_g "date interval";
value date_value_recover = Grammar.Entry.create date_g "date value";
value is_roman_int x =
try
let _ = Mutil.arabian_of_roman x in
True
with
[ Not_found -> False ]
;
value start_with_int x =
try
let s = String.sub x 0 1 in
let _ = int_of_string s in
True
with
[ _ -> False ]
;
value roman_int =
let p =
parser [: `("ID", x) when is_roman_int x :] -> Mutil.arabian_of_roman x
in
Grammar.Entry.of_parser date_g "roman int" p
;
value date_str = ref "";
value make_date n1 n2 n3 =
let n3 =
if no_negative_dates.val then
match n3 with
[ Some n3 -> Some (abs n3)
| None -> None ]
else n3
in
match (n1, n2, n3) with
[ (Some d, Some m, Some y) ->
let (d, m) =
match m with
[ Right m -> (d, m)
| Left m ->
match month_number_dates.val with
[ DayMonthDates -> do { check_month m; (d, m) }
| MonthDayDates -> do { check_month d; (m, d) }
| _ ->
if d >= 1 && m >= 1 && d <= 31 && m <= 31 then
if d > 13 && m <= 13 then (d, m)
else if m > 13 && d <= 13 then (m, d)
else if d > 13 && m > 13 then (0, 0)
else do {
month_number_dates.val :=
MonthNumberHappened date_str.val;
(0, 0)
}
else (0, 0) ] ]
in
let (d, m) = if m < 1 || m > 13 then (0, 0) else (d, m) in
{day = d; month = m; year = y; prec = Sure; delta = 0}
| (None, Some m, Some y) ->
let m =
match m with
[ Right m -> m
| Left m -> m ]
in
{day = 0; month = m; year = y; prec = Sure; delta = 0}
| (None, None, Some y) ->
{day = 0; month = 0; year = y; prec = Sure; delta = 0}
| (Some y, None, None) ->
{day = 0; month = 0; year = y; prec = Sure; delta = 0}
| _ -> raise (Stream.Error "bad date") ]
;
value recover_date cal =
fun
[ Dgreg d Dgregorian ->
let d =
match cal with
[ Dgregorian -> d
| Djulian -> Calendar.gregorian_of_julian d
| Dfrench -> Calendar.gregorian_of_french d
| Dhebrew -> Calendar.gregorian_of_hebrew d ]
in
Dgreg d cal
| d -> d ]
;
EXTEND
GLOBAL: date_value date_interval date_value_recover;
date_value:
[ [ d = date_or_text; EOI -> d ] ]
;
date_value_recover:
[ [ "@"; "#"; ID "DGREGORIAN"; "@"; d = date_value ->
recover_date Dgregorian d
| "@"; "#"; ID "DJULIAN"; "@"; d = date_value ->
recover_date Djulian d
| "@"; "#"; ID "DFRENCH"; ID "R"; "@"; d = date_value ->
recover_date Dfrench d
| "@"; "#"; ID "DHEBREW"; "@"; d = date_value ->
recover_date Dhebrew d ] ]
;
date_interval:
[ [ ID "BEF"; dt = date_or_text; EOI -> End dt
| ID "AFT"; dt = date_or_text; EOI -> Begin dt
| ID "BET"; dt = date_or_text; ID "AND"; dt1 = date_or_text; EOI ->
BeginEnd dt dt1
| ID "TO"; dt = date_or_text; EOI -> End dt
| ID "FROM"; dt = date_or_text; EOI -> Begin dt
| ID "FROM"; dt = date_or_text; ID "TO"; dt1 = date_or_text; EOI ->
BeginEnd dt dt1
| dt = date_or_text; EOI -> Begin dt ] ]
;
date_or_text:
[ [ dr = date_range ->
match dr with
[ Begin (d, cal) -> Dgreg {(d) with prec = After} cal
| End (d, cal) -> Dgreg {(d) with prec = Before} cal
| BeginEnd (d1, cal1) (d2, cal2) ->
let dmy2 =
match cal2 with
[ Dgregorian ->
{day2 = d2.day; month2 = d2.month;
year2 = d2.year; delta2 = 0}
| Djulian ->
let dmy2 = Calendar.julian_of_gregorian d2 in
{day2 = dmy2.day; month2 = dmy2.month;
year2 = dmy2.year; delta2 = 0}
| Dfrench ->
let dmy2 = Calendar.french_of_gregorian d2 in
{day2 = dmy2.day; month2 = dmy2.month;
year2 = dmy2.year; delta2 = 0}
| Dhebrew ->
let dmy2 = Calendar.hebrew_of_gregorian d2 in
{day2 = dmy2.day; month2 = dmy2.month;
year2 = dmy2.year; delta2 = 0} ]
in
Dgreg {(d1) with prec = YearInt dmy2} cal1 ]
| (d, cal) = date -> Dgreg d cal
| s = TEXT -> Dtext s ] ]
;
date_range:
[ [ ID "BEF"; dt = date -> End dt
| ID "AFT"; dt = date -> Begin dt
| ID "BET"; dt = date; ID "AND"; dt1 = date -> BeginEnd dt dt1
| ID "TO"; dt = date -> End dt
| ID "FROM"; dt = date -> Begin dt
| ID "FROM"; dt = date; ID "TO"; dt1 = date -> BeginEnd dt dt1 ] ]
;
date:
[ [ ID "ABT"; (d, cal) = date_calendar -> ({(d) with prec = About}, cal)
| ID "ENV"; (d, cal) = date_calendar -> ({(d) with prec = About}, cal)
| ID "EST"; (d, cal) = date_calendar -> ({(d) with prec = Maybe}, cal)
| ID "AFT"; (d, cal) = date_calendar -> ({(d) with prec = Before}, cal)
| ID "BEF"; (d, cal) = date_calendar -> ({(d) with prec = After}, cal)
| (d, cal) = date_calendar -> (d, cal) ] ]
;
date_calendar:
[ [ "@"; "#"; ID "DGREGORIAN"; "@"; d = date_greg -> (d, Dgregorian)
| "@"; "#"; ID "DJULIAN"; "@"; d = date_greg ->
(Calendar.gregorian_of_julian d, Djulian)
| "@"; "#"; ID "DFRENCH"; ID "R"; "@"; d = date_fren ->
(Calendar.gregorian_of_french d, Dfrench)
| "@"; "#"; ID "DHEBREW"; "@"; d = date_hebr ->
(Calendar.gregorian_of_hebrew d, Dhebrew)
| d = date_greg -> (d, Dgregorian) ] ]
;
date_greg:
[ [ LIST0 "."; n1 = OPT int; LIST0 [ "." | "/" ]; n2 = OPT gen_month;
LIST0 [ "." | "/" ]; n3 = OPT int; LIST0 "." ->
make_date n1 n2 n3 ] ]
;
date_fren:
[ [ LIST0 "."; n1 = int; (n2, n3) = date_fren_kont ->
make_date (Some n1) n2 n3
| LIST0 "."; n1 = year_fren -> make_date (Some n1) None None
| LIST0 "."; (n2, n3) = date_fren_kont -> make_date None n2 n3 ] ]
;
date_fren_kont:
[ [ LIST0 [ "." | "/" ]; n2 = OPT gen_french; LIST0 [ "." | "/" ];
n3 = OPT year_fren; LIST0 "." ->
(n2, n3) ] ]
;
date_hebr:
[ [ LIST0 "."; n1 = OPT int; LIST0 [ "." | "/" ]; n2 = OPT gen_hebr;
LIST0 [ "." | "/" ]; n3 = OPT int; LIST0 "." ->
make_date n1 n2 n3 ] ]
;
gen_month:
[ [ i = int -> Left (abs i)
| m = month -> Right m ] ]
;
month:
[ [ ID "JAN" -> 1
| ID "FEB" -> 2
| ID "MAR" -> 3
| ID "APR" -> 4
| ID "MAY" -> 5
| ID "JUN" -> 6
| ID "JUL" -> 7
| ID "AUG" -> 8
| ID "SEP" -> 9
| ID "OCT" -> 10
| ID "NOV" -> 11
| ID "DEC" -> 12 ] ]
;
gen_french:
[ [ m = french -> Right m ] ]
;
french:
[ [ ID "VEND" -> 1
| ID "BRUM" -> 2
| ID "FRIM" -> 3
| ID "NIVO" -> 4
| ID "PLUV" -> 5
| ID "VENT" -> 6
| ID "GERM" -> 7
| ID "FLOR" -> 8
| ID "PRAI" -> 9
| ID "MESS" -> 10
| ID "THER" -> 11
| ID "FRUC" -> 12
| ID "COMP" -> 13 ] ]
;
year_fren:
[ [ i = int -> i
| ID "AN"; i = roman_int -> i
| i = roman_int -> i ] ]
;
gen_hebr:
[ [ m = hebr -> Right m ] ]
;
hebr:
[ [ ID "TSH" -> 1
| ID "CSH" -> 2
| ID "KSL" -> 3
| ID "TVT" -> 4
| ID "SHV" -> 5
| ID "ADR" -> 6
| ID "ADS" -> 7
| ID "NSN" -> 8
| ID "IYR" -> 9
| ID "SVN" -> 10
| ID "TMZ" -> 11
| ID "AAV" -> 12
| ID "ELL" -> 13 ] ]
;
int:
[ [ i = INT ->
try int_of_string i with [ Failure _ -> raise Stream.Failure ]
| "-"; i = INT ->
try (- int_of_string i) with
[ Failure _ -> raise Stream.Failure ] ] ]
;
END;
value date_of_field pos d =
if d = "" then None
else do {
let s = Stream.of_string (String.uppercase d) in
date_str.val := d;
try Some (Grammar.Entry.parse date_value s) with
[ Ploc.Exc loc (Stream.Error _) ->
let s = Stream.of_string (String.uppercase d) in
try Some (Grammar.Entry.parse date_value_recover s) with
[ Ploc.Exc loc (Stream.Error _) -> Some (Dtext d) ] ]
}
;
(* Creating base *)
type tab 'a = { arr : mutable array 'a; tlen : mutable int };
type gen =
{ g_per : tab (choice3 string person ascend union);
g_fam : tab (choice3 string family couple descend);
g_str : tab string;
g_bnot : mutable string;
g_ic : in_channel;
g_not : Hashtbl.t string int;
g_src : Hashtbl.t string int;
g_hper : Hashtbl.t string Adef.iper;
g_hfam : Hashtbl.t string Adef.ifam;
g_hstr : Hashtbl.t string dsk_istr;
g_hnam : Hashtbl.t string (ref int);
g_adop : Hashtbl.t string (Adef.iper * string);
g_godp : mutable list (Adef.iper * Adef.iper);
g_prelated : mutable list (Adef.iper * Adef.iper);
g_frelated : mutable list (Adef.iper * Adef.iper);
g_witn : mutable list (Adef.ifam * Adef.iper) }
;
value assume_tab name tab none =
if tab.tlen = Array.length tab.arr then do {
let new_len = 2 * Array.length tab.arr + 1 in
let new_arr = Array.create new_len none in
Array.blit tab.arr 0 new_arr 0 (Array.length tab.arr);
tab.arr := new_arr
}
else ()
;
value add_string gen s =
try Hashtbl.find gen.g_hstr s with
[ Not_found ->
let i = gen.g_str.tlen in
do {
assume_tab "gen.g_str" gen.g_str "";
gen.g_str.arr.(i) := s;
gen.g_str.tlen := gen.g_str.tlen + 1;
Hashtbl.add gen.g_hstr s (Adef.istr_of_int i);
Adef.istr_of_int i
} ]
;
value extract_addr addr =
if String.length addr > 0 && addr.[0] = '@' then
try
let r = String.index_from addr 1 '@' in
String.sub addr 0 (r + 1)
with
[ Not_found -> addr ]
else addr
;
value per_index gen lab =
let lab = extract_addr lab in
try Hashtbl.find gen.g_hper lab with
[ Not_found ->
let i = gen.g_per.tlen in
do {
assume_tab "gen.g_per" gen.g_per (Left3 "");
gen.g_per.arr.(i) := Left3 lab;
gen.g_per.tlen := gen.g_per.tlen + 1;
Hashtbl.add gen.g_hper lab (Adef.iper_of_int i);
Adef.iper_of_int i
} ]
;
value fam_index gen lab =
let lab = extract_addr lab in
try Hashtbl.find gen.g_hfam lab with
[ Not_found ->
let i = gen.g_fam.tlen in
do {
assume_tab "gen.g_fam" gen.g_fam (Left3 "");
gen.g_fam.arr.(i) := Left3 lab;
gen.g_fam.tlen := gen.g_fam.tlen + 1;
Hashtbl.add gen.g_hfam lab (Adef.ifam_of_int i);
Adef.ifam_of_int i
} ]
;
value string_empty = Adef.istr_of_int 0;
value string_quest = Adef.istr_of_int 1;
value string_x = Adef.istr_of_int 2;
value unknown_per gen i sex =
let empty = string_empty in
let what = string_quest in
let p =
person_of_gen_person
{first_name = what; surname = what; occ = i; public_name = empty;
image = empty; qualifiers = []; aliases = []; first_names_aliases = [];
surnames_aliases = []; titles = []; rparents = []; related = [];
occupation = empty; sex = sex; access = IfTitles;
birth = Adef.codate_None; birth_place = empty; birth_note = empty;
birth_src = empty; baptism = Adef.codate_None; baptism_place = empty;
baptism_note = empty; baptism_src = empty; death = DontKnowIfDead;
death_place = empty; death_note = empty; death_src = empty;
burial = UnknownBurial; burial_place = empty; burial_note = empty;
burial_src = empty; pevents = []; notes = empty; psources = empty;
key_index = Adef.iper_of_int i}