forked from mattermost/mattermost
/
plain.go
40 lines (33 loc) · 767 Bytes
/
plain.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
package docextractor
import (
"io"
"io/ioutil"
"unicode"
"unicode/utf8"
)
type plainExtractor struct{}
func (pe *plainExtractor) Match(filename string) bool {
return true
}
func (pe *plainExtractor) Extract(filename string, r io.Reader) (string, error) {
// This detects any visible character plus any whitespace
validRanges := append(unicode.GraphicRanges, unicode.White_Space)
text, _ := ioutil.ReadAll(r)
count := 0
for {
c, size := utf8.DecodeRune(text[count:])
if !unicode.In(c, validRanges...) {
return "", nil
}
if size == 0 {
break
}
count += size
if count > 1024 {
break
}
}
return string(text), nil
}