This repository has been archived by the owner on Feb 16, 2023. It is now read-only.
/
wc.go
177 lines (159 loc) · 2.92 KB
/
wc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package wc
import (
"bytes"
"io"
"os"
"unicode"
"unicode/utf8"
"github.com/ericlagergren/go-coreutils/wc/internal/sys"
)
type Results struct {
Lines int64
Words int64
Chars int64
Bytes int64
MaxLength int64
}
type Counter struct {
TabWidth int64
buf [1 << 17]byte
opts uint8
}
const (
Lines = 1 << iota // count lines
Words // count words
Chars // count chars
Bytes // count bytes
MaxLength // find max line length
)
func NewCounter(opts uint8) *Counter {
return &Counter{opts: opts, TabWidth: 8}
}
func (c *Counter) read(r io.Reader) (int64, error) {
n, err := r.Read(c.buf[:])
if err != nil {
return 0, err
}
return int64(n), nil
}
var newLine = []byte{'\n'}
func (c *Counter) Count(r io.Reader) (res Results, err error) {
if file, ok := r.(*os.File); ok {
if c.opts == Bytes {
if n, ok := statSize(file); ok {
return Results{Bytes: n}, nil
}
}
sys.Fadvise(int(file.Fd()))
}
switch c.opts {
case Bytes:
for {
n, err := c.read(r)
res.Bytes += n
if err != nil {
if err == io.EOF {
return res, nil
}
return res, err
}
}
case Lines, Lines | Bytes:
for {
n, err := c.read(r)
res.Bytes += n
res.Lines += int64(bytes.Count(c.buf[:n], newLine))
if err != nil {
if err == io.EOF {
return res, nil
}
return res, err
}
}
default:
return c.countComplicated(r)
}
}
func (c *Counter) countComplicated(r io.Reader) (res Results, err error) {
var (
pos int64
inWord bool
)
for {
n, err := c.read(r)
res.Bytes += n
if err != nil {
if err == io.EOF {
break
}
return res, err
}
for bp := 0; int64(bp) < n; {
r, s := utf8.DecodeRune(c.buf[bp:])
switch r {
case '\n':
res.Lines++
fallthrough
case '\r', '\f':
if pos > res.MaxLength {
res.MaxLength = pos
}
pos = 0
if inWord {
res.Words++
}
inWord = false
case '\t':
pos += c.TabWidth - (pos % c.TabWidth)
if inWord {
res.Words++
}
inWord = false
case ' ':
pos++
fallthrough
case '\v':
if inWord {
res.Words++
}
inWord = false
default:
if !unicode.IsPrint(r) {
break
}
pos++
if unicode.IsSpace(r) {
if inWord {
res.Words++
}
inWord = false
} else {
inWord = true
}
}
res.Chars++
bp += s
}
}
if pos > res.MaxLength {
res.MaxLength = pos
}
if inWord {
res.Words++
}
return res, nil
}
func statSize(file *os.File) (n int64, ok bool) {
stat, err := file.Stat()
if err != nil {
return 0, false
}
const badMode = os.ModeDir | os.ModeNamedPipe | os.ModeSocket
if stat.Mode()&badMode != 0 {
return 0, false
}
// NOTE(eric): GNU's wc says we should seek 1 block size from EOF because it
// works better on proc-like systems. I like the idea, but I don't want this
// code to be under the GPL.
return stat.Size(), true
}