/
doc.go
278 lines (223 loc) · 6.64 KB
/
doc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
package dynamo
import (
"context"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/dynamodb"
dyna "github.com/aws/aws-sdk-go-v2/service/dynamodb/dynamodbattribute"
"github.com/docshelf/docshelf"
"github.com/pkg/errors"
"github.com/rs/xid"
)
// A Tag represents the dynamo data structure of a tag.
type Tag struct {
Tag string `json:"tag"`
Paths []string `json:"paths"`
}
// GetDoc fetches a docshelf Document from dynamodb. It will also read and package the Content
// form an underlying FileStore.
func (s Store) GetDoc(ctx context.Context, path string) (docshelf.Doc, error) {
var doc docshelf.Doc
if _, err := xid.FromString(path); err != nil {
var docs []docshelf.Doc
if err := s.getItemsGsi(ctx, s.docTable, s.docIDIndex, "id", path, &docs); err != nil {
return doc, err
}
if len(docs) == 0 {
return doc, docshelf.NewErrNotFound("")
}
path = docs[0].Path
}
if err := s.getItem(ctx, s.docTable, "path", path, &doc); err != nil {
return doc, err
}
content, err := s.fs.ReadFile(path)
if err != nil {
return doc, err
}
doc.Content = string(content)
return doc, nil
}
// ListDocs fetches a slice of docshelf Document metadata from dynamodb. If a query is provided, then the configured
// docshelf.TextIndex will be used to get a set of document paths. If tags are also provided, then they will be used
// to further filter down the results. If no query is provided, but tags are, then the tags will filter down the entire
// set of documents stored.
func (s Store) ListDocs(ctx context.Context, query string, tags ...string) ([]docshelf.Doc, error) {
var docs []docshelf.Doc
var foundPaths []string
// do a full listing if no filters are given
if query == "" && len(tags) == 0 {
input := dynamodb.ScanInput{
TableName: aws.String(s.docTable),
}
res, err := s.client.ScanRequest(&input).Send()
if err != nil {
return nil, err
}
if err := dyna.UnmarshalListOfMaps(res.Items, &docs); err != nil {
return nil, err
}
return docs, nil
}
if query != "" {
var err error
foundPaths, err = s.ti.Search(ctx, query)
if err != nil {
return nil, err
}
}
if len(tags) == 0 {
return s.listDocs(ctx, foundPaths)
}
tagged, err := s.listTaggedDocs(ctx, tags)
if err != nil {
return nil, err
}
if query == "" {
return tagged, nil
}
if len(foundPaths) > 0 {
for _, doc := range tagged {
if contains(foundPaths, doc.Path) {
docs = append(docs, doc)
}
}
}
return docs, nil
}
func (s Store) listDocs(ctx context.Context, paths []string) ([]docshelf.Doc, error) {
var docs []docshelf.Doc
for _, path := range paths {
var doc docshelf.Doc
if err := s.getItem(ctx, s.docTable, "path", path, &doc); err != nil {
return nil, err
}
docs = append(docs, doc)
}
return docs, nil
}
func (s Store) listTaggedDocs(ctx context.Context, tags []string) ([]docshelf.Doc, error) {
var paths []string
for _, t := range tags {
var tag Tag
if err := s.getItem(ctx, s.tagTable, "tag", t, &tag); err != nil {
return nil, err
}
if paths == nil {
paths = tag.Paths
} else {
paths = intersect(paths, tag.Paths)
}
}
var docs []docshelf.Doc
for _, path := range paths {
var doc docshelf.Doc
if err := s.getItem(ctx, s.docTable, "path", path, &doc); err != nil {
return nil, err
}
docs = append(docs, doc)
}
return docs, nil
}
// PutDoc creates or updates an existing docshelf Doc in dynamodb. It will also store the
// Content in an underlying FileStore.
func (s Store) PutDoc(ctx context.Context, doc docshelf.Doc) (string, error) {
// having no path is an invalid state
if doc.Path == "" {
return "", errors.New("doc must have a valid path")
}
if existing, err := s.GetDoc(ctx, doc.Path); err != nil {
if !docshelf.CheckNotFound(err) {
return "", errors.Wrap(err, "could not verify existing file")
}
// set one-time fields for new document
doc.ID = xid.New().String()
doc.CreatedAt = time.Now()
} else {
// need to enforce integrity of created* fields if the doc exists.
doc.CreatedBy = existing.CreatedBy
doc.CreatedAt = existing.CreatedAt
}
doc.UpdatedAt = time.Now()
// save content
if err := s.fs.WriteFile(doc.Path, []byte(doc.Content)); err != nil {
return "", errors.Wrap(err, "failed to write doc to file store")
}
// full text index
if err := s.ti.Index(ctx, doc); err != nil {
return "", errors.Wrap(err, "failed to text index doc")
}
doc.Content = "" // need to clear content before storing doc
marshaled, err := dyna.MarshalMap(&doc)
if err != nil {
return "", errors.Wrap(err, "failed to marshal doc for dynamo")
}
input := dynamodb.PutItemInput{
TableName: aws.String(s.docTable),
Item: marshaled,
}
// save metadata
if _, err := s.client.PutItemRequest(&input).Send(); err != nil {
if err := s.fs.RemoveFile(doc.Path); err != nil { // need to rollback file storage if doc failes
return "", errors.Wrapf(err, "cleanup failed for file: %s", doc.Path)
}
return "", errors.Wrap(err, "failed to put doc into dynamo")
}
return doc.ID, nil
}
// TagDoc tags an existing document with the given tags.
// TODO (erik): This is a mirror of the bolt implementation. Need to research and find out
// if there's a more efficient way to get this behavior out of dynamo.
func (s Store) TagDoc(ctx context.Context, path string, tags ...string) error {
if _, err := xid.FromString(path); err == nil {
doc, err := s.GetDoc(ctx, path)
if err != nil {
return err
}
path = doc.Path
}
for _, t := range tags {
var tag Tag
if err := s.getItem(ctx, s.tagTable, "tag", t, &tag); err != nil {
return err
}
// short circuit if the tag alrady contains the path or no tag was returned.
if contains(tag.Paths, path) {
continue
}
if tag.Tag == "" {
tag.Tag = t
}
tag.Paths = append(tag.Paths, path)
marshaled, err := dyna.MarshalMap(&tag)
if err != nil {
return err
}
input := dynamodb.PutItemInput{
TableName: aws.String(s.tagTable),
Item: marshaled,
}
if _, err := s.client.PutItemRequest(&input).Send(); err != nil {
return err
}
}
return nil
}
// RemoveDoc removes a docshelf Doc from dynamo as well as the underlying FileStore.
func (s Store) RemoveDoc(ctx context.Context, path string) error {
if err := s.fs.RemoveFile(path); err != nil {
return errors.Wrap(err, "failed to remove doc from file store")
}
key, err := makeKey("path", path)
if err != nil {
return errors.Wrap(err, "failed to make key")
}
input := dynamodb.DeleteItemInput{
TableName: aws.String(s.docTable),
Key: key,
}
if _, err := s.client.DeleteItemRequest(&input).Send(); err != nil {
return errors.Wrap(err, "failed to delete doc from dynamo")
}
return nil
}