forked from princjef/gomarkdoc
/
util.go
135 lines (111 loc) · 3.14 KB
/
util.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
package lang
import (
"go/ast"
"go/printer"
"go/token"
"regexp"
"strings"
"unicode"
)
func printNode(node ast.Node, fs *token.FileSet) (string, error) {
cfg := printer.Config{
Mode: printer.UseSpaces,
Tabwidth: 4,
}
var out strings.Builder
if err := cfg.Fprint(&out, fs, node); err != nil {
return "", err
}
return out.String(), nil
}
func runeIsUpper(r rune) bool {
return r >= 'A' && r <= 'Z'
}
const lowerToUpper = 'a' - 'A'
func runeToUpper(r rune) rune {
return r - lowerToUpper
}
func splitCamel(text string) string {
var builder strings.Builder
var previousRune rune
var wordLength int
for i, r := range text {
if i == 0 {
previousRune = runeToUpper(r)
continue
}
switch {
case runeIsUpper(previousRune) && !runeIsUpper(r) && wordLength > 0:
// If we have a capital followed by a lower, that capital should
// begin a word. Throw a space before the runes if there is a word
// there.
builder.WriteRune(' ')
builder.WriteRune(previousRune)
wordLength = 1
case !runeIsUpper(previousRune) && runeIsUpper(r):
// If we have a lower followed by a capital, the capital should
// begin a word. Throw a space in between the runes. We don't have
// to check word length because we're writing the previous rune to
// the previous word, automaticall giving it a length of 1.
builder.WriteRune(previousRune)
builder.WriteRune(' ')
wordLength = 0
default:
// Otherwise, just throw the rune onto the previous word
builder.WriteRune(previousRune)
wordLength++
}
previousRune = r
}
// Write the last rune
if previousRune != 0 {
builder.WriteRune(previousRune)
}
return builder.String()
}
func extractSummary(doc string) string {
firstParagraph := normalizeDoc(doc)
// Trim to first paragraph if there are multiple
if idx := strings.Index(firstParagraph, "\n\n"); idx != -1 {
firstParagraph = firstParagraph[:idx]
}
var builder strings.Builder
var lookback1 rune
var lookback2 rune
var lookback3 rune
for _, r := range formatDocParagraph(firstParagraph) {
// We terminate the sequence if we see a space preceded by a '.' which
// does not have exactly one word character before it (to avoid
// treating initials as the end of a sentence).
isPeriod := r == ' ' && lookback1 == '.'
isInitial := unicode.IsUpper(lookback2) && !unicode.IsLetter(lookback3) && !unicode.IsDigit(lookback3)
if isPeriod && !isInitial {
break
}
// Write the rune
builder.WriteRune(r)
// Update tracking variables
lookback3 = lookback2
lookback2 = lookback1
lookback1 = r
}
// Make the summary end with a period if it is nonempty and doesn't already.
if lookback1 != '.' && lookback1 != 0 {
builder.WriteRune('.')
}
return builder.String()
}
var crlfRegex = regexp.MustCompile("\r\n")
func normalizeDoc(doc string) string {
return strings.TrimSpace(crlfRegex.ReplaceAllString(doc, "\n"))
}
func formatDocParagraph(paragraph string) string {
var mergedParagraph strings.Builder
for i, line := range strings.Split(paragraph, "\n") {
if i > 0 {
mergedParagraph.WriteRune(' ')
}
mergedParagraph.WriteString(strings.TrimSpace(line))
}
return mergedParagraph.String()
}