-
Notifications
You must be signed in to change notification settings - Fork 0
/
format.go
130 lines (110 loc) · 3.77 KB
/
format.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// Package format contains functions for producing a [CodeReader] from a stream
// that contains markdown files with fishi codeblocks. A CodeReader can be sent
// directly to the frontend and handles all gathering of codeblocks and running
// any preprocessing needed on it.
package format
import (
"bufio"
"bytes"
"io"
"regexp"
"strings"
"unicode"
)
var (
// should match solitary # on line, but we'd 8etter make sure there's a
// newline for the [^#] after the # to match on, otherwise this won't be
// gr8.
regexCommentStart = regexp.MustCompile(`(?:^|[^#]+)(#)[^#]`)
)
// CodeReader is an implementation of io.Reader that reads fishi code from input
// containing markdown-formatted text with fishi codeblocks. It will gather all
// fishi codeblocks immediately on open and then read bytes from them as Read is
// called. Preprocessing may also be done at that time. The CodeReader will
// return io.EOF when all bytes from fishi codeblocks in the stream have been
// read.
type CodeReader struct {
r *bytes.Reader
}
// Read reads bytes from the CodeReader. It will return io.EOF when all bytes
// from fishi codeblocks in the stream have been read. It cannot return an
// error as the actual underlying stream it was opened on is fully consumed at
// the time of opening.
func (cr *CodeReader) Read(p []byte) (n int, err error) {
return cr.r.Read(p)
}
// NewCodeReader creates a new CodeReader from a stream containing markdown
// formatted text with fishi codeblocks. It will immediately read the provided
// stream until it returns EOF and find all fishi codeblocks and run
// preprocessing on them.
//
// Returns non-nil error if there is a problem reading the markdown or
// preprocessing the code.
func NewCodeReader(r io.Reader) (*CodeReader, error) {
// read the whole stream into a buffer
allInput := make([]byte, 0)
bufReader := make([]byte, 256)
var err error
for err != io.EOF {
var n int
n, err = r.Read(bufReader)
if n > 0 {
allInput = append(allInput, bufReader[:n]...)
}
if err != nil && err != io.EOF {
return nil, err
}
}
gatheredFishi := scanMarkdownForFishiBlocks(allInput)
fishiSource := normalizeFishi(gatheredFishi)
cr := &CodeReader{
r: bytes.NewReader(fishiSource),
}
return cr, nil
}
// normalizeFishi does a preprocess step on the source, which as of now includes
// stripping comments, replacing ## with literal #, and normalizing ends of
// lines to \n.
//
// In addition, all leading space is stripped.
func normalizeFishi(source []byte) []byte {
toBuf := make([]byte, len(source))
copy(toBuf, source)
scanner := bufio.NewScanner(bytes.NewBuffer(toBuf))
var preprocessed strings.Builder
var strippedLeadingSpace bool
for scanner.Scan() {
line := scanner.Text()
// this normalization for all ends of lines will not fully come into
// play; the scanner by default is only going to return lines ending in
// \r\n or \n. Still, probs good to leave it in in case we upd8 it l8er.
if strings.HasSuffix(line, "\r\n") || strings.HasSuffix(line, "\n\r") {
line = line[0 : len(line)-2]
} else {
line = strings.TrimSuffix(line, "\n")
line = strings.TrimSuffix(line, "\r")
}
// do *not* take double #'s.
// we add a \n because that makes the regex match on # at line end.
indexes := regexCommentStart.FindStringSubmatchIndex(line + "\n")
if len(indexes) > 1 {
commentStartIdx := indexes[2]
if commentStartIdx >= 0 {
line = line[:commentStartIdx]
}
}
// now replace any double #'s with normal ones:
line = strings.ReplaceAll(line, "##", "#")
if !strippedLeadingSpace {
line = strings.TrimLeftFunc(line, unicode.IsSpace)
if line == "" {
continue
} else {
strippedLeadingSpace = true
}
}
preprocessed.WriteString(line)
preprocessed.WriteRune('\n')
}
return []byte(preprocessed.String())
}