-
Notifications
You must be signed in to change notification settings - Fork 0
/
eol-reader.lisp
128 lines (105 loc) · 4.63 KB
/
eol-reader.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
;; Copyright (c) Flavio Egoavil <F_egoavil@hotmail.com> aka D E F U N K Y D R U M M E R
;; MIT License
;; **********************
;; Functions for reading byte streams line per line
;; **********************
(common-lisp:defpackage :auto-text/eol
(:use :cl
:auto-text/common)
(:export
:advance-after-eol
;:fetch-line
;:fetch-fixed-amount-of-characters
:filter-eol-on-string))
(common-lisp:in-package :auto-text/eol)
(declaim (optimize (speed 3)))
(defun advance-after-eol (stream eol-vector buffer)
"within stream, seek until finding an EOL,
return the new file-position for positioning after EOL.
Input parameters: Stream,
EOL VECTOR is the sequence of bytes that indicate an EOL
buffer is a byte buffer, needs to be created beforehand
Side effect: Stream file position altered to after EOL.
Returns: new file position or NIL on EOF."
(declare (type stream stream)
(type simple-vector eol-vector)
(type tbytebuffer buffer))
(let ((num (read-sequence buffer stream)))
(declare (type tinteger num))
(if (zerop num) nil
(progn
;; search for the EOL
(let ((res (search eol-vector buffer :end2 num :test 'eql)))
(if (null res) ;no EOL found (strange...)
(advance-after-eol stream eol-vector buffer) ;try again
;; else: EOL found
;; position after the EOL
(let ((rewind (- num res (length eol-vector))))
;; NOTE:
;; spec says: file-position returns true if the repositioning is performed successfully
(file-position stream
(- (file-position stream) rewind))
;; return file pos
(file-position stream)
)))))))
;; included just for clarity
;; (defun advance-after-pattern (stream vector buffer)
;; "within stream, seek until finding a byte sequence.,
;; return the new file-position for positioning after said sequence..
;; Input parameters: Stream,
;; VECTOR is the sequence to find.
;; buffer is a byte buffer, needs to be created beforehand
;; Side effect: Stream file position altered to after sequence.
;; Returns: new file position or NIL on EOF."
;; (advance-after-eol stream vector buffer))
;; --------------------------------------------------------
;; FETCH LINE DOES NOT WORK CORRECTLY!! BUGS!!!
;; --------------------------------------------------------
;; (defun fetch-line (str eol-vector buffer)
;; "Read line from stream (current position) into buffer.
;; This advances the file position to after the end of line.
;; This can also be used to read delimited files...
;; NOTE: Obviously this won't work with UTF-16 or UTF-32 files.
;; NIL when line not found (EOF)."
;; (let* ((fpos1
;; (if (zerop (file-position str)) 0
;; (advance-after-eol str eol-vector buffer)))
;; ;; then again -- go to the end of the other line
;; (fpos2 (advance-after-eol str eol-vector buffer)))
;; ;; get line length
;; (when (not (or (null fpos1)
;; (null fpos2)))
;; (let ((line-len (- fpos2 fpos1)))
;; ;; read and return the line!
;; (file-position str fpos1)
;; (read-sequence buffer str :end line-len)
;; ;; the line itself...
;; (subseq buffer 0 (- line-len (length eol-vector)))))))
;; (defun fetch-line-babel (str eol-vector buffer encoding)
;; "Read line with fetch-line, then convert with BABEL to the
;; desired encoding. (Encoding must be a babel-accepted encoding.)
;; STRING IS RETURNED (dont use the buffer)"
;; (babel:octets-to-string
;; (fetch-line str eol-vector buffer)
;; :encoding encoding))
;; (defun fetch-fixed-amount-of-characters
;; (str buffer num-chars encoding)
;; "Read line (using BABEL) from byte stream, to get the required number of CHARACTERS (not bytes), using babel as a decoder. (Encoding must be a babel-accepted encoding.)
;; STRING IS RETURNED (dont use the buffer)"
;; ;; we need to read more than the line num of chars, because UTF-8 etc
;; (read-sequence buffer str :end (length buffer))
;; (babel:octets-to-string
;; buffer
;; :start 0
;; :end num-chars
;; :encoding encoding))
(defun filter-eol-on-string (string eol-vector &optional (new-character #\Space))
"Filter EOL on string, replace with new-character.
STRING IS RETURNED (dont use the buffer)"
;(break string)
(loop for chcode across eol-vector
do
(when (find (code-char chcode) string :test #'equal)
(break (format nil "CR or LF on: ~A" string)))
(nsubstitute new-character (code-char chcode) string :test #'equal))
string)