-
Notifications
You must be signed in to change notification settings - Fork 414
/
dataset.go
179 lines (143 loc) · 4.18 KB
/
dataset.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
package cwhub
import (
"errors"
"fmt"
"io"
"io/fs"
"net/http"
"os"
"path/filepath"
"runtime"
"time"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
// The DataSet is a list of data sources required by an item (built from the data: section in the yaml).
type DataSet struct {
Data []types.DataSource `yaml:"data,omitempty"`
}
// downloadFile downloads a file and writes it to disk, with no hash verification.
func downloadFile(url string, destPath string) error {
resp, err := hubClient.Get(url)
if err != nil {
return fmt.Errorf("while downloading %s: %w", url, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad http code %d for %s", resp.StatusCode, url)
}
// Download to a temporary location to avoid corrupting files
// that are currently in use or memory mapped.
tmpFile, err := os.CreateTemp(filepath.Dir(destPath), filepath.Base(destPath)+".*.tmp")
if err != nil {
return err
}
tmpFileName := tmpFile.Name()
defer func() {
tmpFile.Close()
os.Remove(tmpFileName)
}()
// avoid reading the whole file in memory
_, err = io.Copy(tmpFile, resp.Body)
if err != nil {
return err
}
if err = tmpFile.Sync(); err != nil {
return err
}
if err = tmpFile.Close(); err != nil {
return err
}
// a check on stdout is used while scripting to know if the hub has been upgraded
// and a configuration reload is required
// TODO: use a better way to communicate this
fmt.Printf("updated %s\n", filepath.Base(destPath))
if runtime.GOOS == "windows" {
// On Windows, rename will fail if the destination file already exists
// so we remove it first.
err = os.Remove(destPath)
switch {
case errors.Is(err, fs.ErrNotExist):
break
case err != nil:
return err
}
}
if err = os.Rename(tmpFileName, destPath); err != nil {
return err
}
return nil
}
// needsUpdate checks if a data file has to be downloaded (or updated).
// if the local file doesn't exist, update.
// if the remote is newer than the local file, update.
// if the remote has no modification date, but local file has been modified > a week ago, update.
func needsUpdate(destPath string, url string, logger *logrus.Logger) bool {
fileInfo, err := os.Stat(destPath)
switch {
case os.IsNotExist(err):
return true
case err != nil:
logger.Errorf("while getting %s: %s", destPath, err)
return true
}
resp, err := hubClient.Head(url)
if err != nil {
logger.Errorf("while getting %s: %s", url, err)
// Head failed, Get would likely fail too -> no update
return false
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
logger.Errorf("bad http code %d for %s", resp.StatusCode, url)
return false
}
// update if local file is older than this
shelfLife := 7 * 24 * time.Hour
lastModify := fileInfo.ModTime()
localIsOld := lastModify.Add(shelfLife).Before(time.Now())
remoteLastModified := resp.Header.Get("Last-Modified")
if remoteLastModified == "" {
if localIsOld {
logger.Infof("no last modified date for %s, but local file is older than %s", url, shelfLife)
}
return localIsOld
}
lastAvailable, err := time.Parse(time.RFC1123, remoteLastModified)
if err != nil {
logger.Warningf("while parsing last modified date for %s: %s", url, err)
return localIsOld
}
if lastModify.Before(lastAvailable) {
logger.Infof("new version available, updating %s", destPath)
return true
}
return false
}
// downloadDataSet downloads all the data files for an item.
func downloadDataSet(dataFolder string, force bool, reader io.Reader, logger *logrus.Logger) error {
dec := yaml.NewDecoder(reader)
for {
data := &DataSet{}
if err := dec.Decode(data); err != nil {
if errors.Is(err, io.EOF) {
break
}
return fmt.Errorf("while reading file: %w", err)
}
for _, dataS := range data.Data {
destPath, err := safePath(dataFolder, dataS.DestPath)
if err != nil {
return err
}
if force || needsUpdate(destPath, dataS.SourceURL, logger) {
logger.Debugf("downloading %s in %s", dataS.SourceURL, destPath)
if err := downloadFile(dataS.SourceURL, destPath); err != nil {
return fmt.Errorf("while getting data: %w", err)
}
}
}
}
return nil
}