/
ruleset.go
236 lines (218 loc) · 5.88 KB
/
ruleset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
package model
import (
"bytes"
"encoding/hex"
"hash/fnv"
"io"
"io/ioutil"
"os"
"path"
"regexp"
"strings"
"time"
"unicode/utf8"
"github.com/go-git/go-git/v5/plumbing/format/diff"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/gobuffalo/packr/v2"
"github.com/mholt/archiver/v3"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
)
const (
// contextsize is the amount of lines (before and after)
// the violating line that will be added to the snippet
contextSize = 4
)
// RuleSet groups all Rules and Parsers interpreted from the user defined file
//
// - Rules represent parsers that are context independant
// it can parse a file line by line to precisely find the leak
// - Parsers are parsers that need the entire file as a context
// to analyse for leaks correctly (TODO: rename)
//
type RuleSet struct {
// Version of the configuration file
// Not used currently but for future proofing
APIVersion string `yaml:"apiVersion"`
// FNV hash of the configuration file
// Useful for determining whether or not the definition file
// has been changed. (for future uses)
Checksum string
ReadAt time.Time
IndepParsers []IndepParserRule `yaml:"rules"`
CtxParsers []CtxParserRule `yaml:"parsers"`
BlackList []string `yaml:"black_list"`
BlackListCompiled []*regexp.Regexp `yaml:"-"`
// Sets whether or not to examine compressed files
Compressed bool `yaml:"compressed"`
}
// ParseConfig reads the user defined configuration file
func (r *RuleSet) ParseConfig(file string) {
var data []byte
var err error
if len(file) == 0 {
box := packr.New("rules", "../../resources")
data, err = box.Find("rules.yaml")
if err != nil {
log.Fatal().
Str("path", file).
Err(err).
Msg("Failed to read static binary definition")
}
} else {
data, err = ioutil.ReadFile(file)
if err != nil {
log.Fatal().
Str("path", file).
Err(err).
Msg("Failed to read rules definition @")
}
}
if err := yaml.Unmarshal(data, &r); err != nil {
log.Fatal().
Str("path", file).
Err(err).
Msg("Failed to unmarshal yaml @")
}
r.Checksum = hex.EncodeToString(fnv.New32().Sum(data))[:10]
r.ReadAt = time.Now()
for idx, rule := range r.IndepParsers {
r.IndepParsers[idx].Compiled = regexp.MustCompile(rule.Definition)
}
for idx := range r.CtxParsers {
r.CtxParsers[idx].Init()
}
for _, bl := range r.BlackList {
r.BlackListCompiled = append(r.BlackListCompiled, regexp.MustCompile(bl))
}
}
// ParsePatch iterates over each chunk of the patch object
// and applies all context indenpendant rules
// TODO: allow context dependant rules
//
func (r *RuleSet) ParsePatch(patch *object.Patch, commit *object.Commit, repo *Repo, leakChan chan Leak) {
for _, filePatch := range patch.FilePatches() {
if filePatch.IsBinary() {
break
}
_, to := filePatch.Files()
if to == nil {
continue
}
for _, blacklist := range r.BlackListCompiled {
if blacklist.MatchString(to.Path()) {
break
}
}
for _, chunk := range filePatch.Chunks() {
if chunk.Type() == diff.Add {
lines := strings.Split(chunk.Content(), "\n")
for idx, line := range lines {
for _, rule := range r.IndepParsers {
match := rule.Compiled.FindStringIndex(line)
if len(match) > 0 {
start := idx - contextSize
end := idx + contextSize
if start < 0 {
start = 0
}
if end >= len(lines) {
end = len(lines) - 1
}
disc := GitLeak{
Line: idx,
Affected: idx - start,
File: to.Path(),
StartIdx: match[0],
EndIdx: match[1],
Author: commit.Author.Name,
When: commit.Author.When,
Commit: to.Hash().String(),
Repo: repo,
IndepParserRule: &rule,
}
disc.Snippet = make([]string, len(lines[start:end]))
copy(disc.Snippet, lines[start:end])
leakChan <- disc
break
}
}
}
}
}
}
}
func (r *RuleSet) parseArchive(file *os.File, leakChan chan Leak) {
if err := archiver.Walk(file.Name(), func(f archiver.File) error {
if f.Mode().IsRegular() {
r.parseRegular(f, path.Join(file.Name(), f.Name()), leakChan)
}
return nil
}); err != nil {
log.Error().Str("file", file.Name()).Err(err).Msg("Failed to read archive")
}
}
func (r *RuleSet) parseRegular(file io.Reader, filename string, leakChan chan Leak) {
// Check for blacklisted file names
for _, rule := range r.CtxParsers {
for _, ext := range rule.Extensions {
if strings.HasSuffix(filename, ext) {
rule.Parser.Parse(file, leakChan, filename, &rule)
return
}
}
}
buf := &bytes.Buffer{}
buf.ReadFrom(file)
lines := strings.Split(buf.String(), "\n")
for idx, line := range lines {
if !utf8.ValidString(line) {
continue
}
for _, rule := range r.IndepParsers {
match := rule.Compiled.FindStringIndex(line)
if len(match) > 0 {
start := idx - contextSize
end := idx + contextSize
if start < 0 {
start = 0
}
if end >= len(lines) {
end = len(lines) - 1
}
disc := FileLeak{
File: filename,
StartIdx: match[0],
EndIdx: match[1],
Line: idx,
Affected: idx - start,
IndepParserRule: &rule,
Confidence: "Low",
}
disc.Snippet = make([]string, len(lines[start:end]))
copy(disc.Snippet, lines[start:end])
leakChan <- disc
break
}
}
}
}
// Parse reads a given file and applies all rules given
func (r *RuleSet) Parse(file string, leakChan chan Leak) {
fd, err := os.Open(file)
if err != nil {
log.Trace().
Str("file", file).
Err(err).
Msg("Failed to read")
return
}
defer fd.Close()
_, err = archiver.ByExtension(file)
if r.Compressed && err == nil {
// Parse archives
r.parseArchive(fd, leakChan)
} else {
r.parseRegular(fd, file, leakChan)
}
}