-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreadatom.pl
88 lines (62 loc) · 2.39 KB
/
readatom.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
% File READATOM.PL
% Michael A. Covington
% Natural Language Processing for Prolog Programmers
% (Prentice-Hall)
% Appendix B
% Version of read_atomics/1 for most Prologs. See text.
% read_atomics(-Atomics)
% Reads a line of text, breaking it into a
% list of atomic terms: [this,is,an,example].
read_atomics(Atomics) :-
read_char(FirstC,FirstT),
complete_line(FirstC,FirstT,Atomics).
% read_char(-Char,-Type)
% Reads a character and runs it through char_type/1.
read_char(Char,Type) :-
get0(C),
char_type(C,Type,Char).
% complete_line(+FirstC,+FirstT,-Atomics)
% Given FirstC (the first character) and FirstT (its type), reads
% and tokenizes the rest of the line into atoms and numbers.
complete_line(_,end,[]) :- !. % stop at end
complete_line(_,blank,Atomics) :- % skip blanks
!,
read_atomics(Atomics).
complete_line(FirstC,special,[A|Atomics]) :- % special char
!,
name(A,[FirstC]),
read_atomics(Atomics).
complete_line(FirstC,alpha,[A|Atomics]) :- % begin word
complete_word(FirstC,alpha,Word,NextC,NextT),
name(A,Word), % may not handle numbers correctly - see text
complete_line(NextC,NextT,Atomics).
% complete_word(+FirstC,+FirstT,-List,-FollC,-FollT)
% Given FirstC (the first character) and FirstT (its type),
% reads the rest of a word, putting its characters into List.
complete_word(FirstC,alpha,[FirstC|List],FollC,FollT) :-
!,
read_char(NextC,NextT),
complete_word(NextC,NextT,List,FollC,FollT).
complete_word(FirstC,FirstT,[],FirstC,FirstT).
% where FirstT is not alpha
% char_type(+Code,?Type,-NewCode)
% Given an ASCII code, classifies the character as
% 'end' (of line/file), 'blank', 'alpha'(numeric), or 'special',
% and changes it to a potentially different character (NewCode).
char_type(10,end,10) :- !. % UNIX end of line mark
char_type(13,end,13) :- !. % DOS end of line mark
char_type(-1,end,-1) :- !. % get0 end of file code
char_type(Code,blank,32) :- % blanks, other ctrl codes
Code =< 32,
!.
char_type(Code,alpha,Code) :- % digits
48 =< Code, Code =< 57,
!.
char_type(Code,alpha,Code) :- % lower-case letters
97 =< Code, Code =< 122,
!.
char_type(Code,alpha,NewCode) :- % upper-case letters
65 =< Code, Code =< 90,
!,
NewCode is Code + 32. % (translate to lower case)
char_type(Code,special,Code). % all others