forked from Azure/azure-storage-azcopy
/
folderDeletionManager.go
226 lines (195 loc) · 9.14 KB
/
folderDeletionManager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
// Copyright Microsoft <wastore@microsoft.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
package common
import (
"context"
"net/url"
"strings"
"sync"
)
// folderDeletionFunc should delete the folder IF IT IS EMPTY, and return true.
// If it is not empty, false must be returned.
// FolderDeletionManager is allowed to call this on a folder that is not yet empty.
// In that case, FolderDeletionManager may call it again later.
// Errors are not returned because of the delay to when deletion might happen, so
// it's up to the func to do its own logging
type FolderDeletionFunc func(context.Context, ILogger) bool
// FolderDeletionManager handles the fact that (in most locations) we can't delete folders that
// still contain files. So it allows us to request deletion of a folder, and have that be attempted
// after the last file is removed. Note that maybe the apparent last file isn't the last (e.g.
// there are other files, still to be deleted, in future job parts), in which case any failed deletion
// will be retried if there's a new "candidate last child" removed.
// Takes URLs rather than strings because that ensures correct (un)escaping, and makes it clear that we
// don't support Windows & MacOS local paths (which have cases insensitivity that we don't support here).
type FolderDeletionManager interface {
// RecordChildExists takes a child name and counts it against the child's immediate parent
// Should be called for both types of child: folders and files.
// Only counts it against the immediate parent (that's all that's necessary, because we recurse in tryDeletion)
RecordChildExists(childFileOrFolder *url.URL)
// RecordChildDelete records that a file, previously passed to RecordChildExists, has now been deleted
// Only call for files, not folders
RecordChildDeleted(childFile *url.URL)
// RequestDeletion registers a function that will be called to delete the given folder, when that
// folder has no more known children. May be called before, after or during the time that
// the folder's children are being passed to RecordChildExists and RecordChildDeleted
//
// Warning: only pass in deletionFuncs that will do nothing and return FALSE if the
// folder is not yet empty. If they return false, they may be called again later.
RequestDeletion(folder *url.URL, deletionFunc FolderDeletionFunc)
// TODO: do we want this to report, so that we can log, any folders at the very end which still are not deleted?
// or will we just leave such folders there, with no logged message other than any "per attempt" logging?
}
type folderDeletionState struct {
childCount int64
deleter FolderDeletionFunc
}
func (f *folderDeletionState) shouldDeleteNow() bool {
deletionRequested := f.deleter != nil
return deletionRequested && f.childCount == 0
}
func NewFolderDeletionManager(ctx context.Context, fpo FolderPropertyOption, logger ILogger) FolderDeletionManager {
switch fpo {
case EFolderPropertiesOption.AllFolders(),
EFolderPropertiesOption.AllFoldersExceptRoot():
return &standardFolderDeletionManager{
mu: &sync.Mutex{},
contents: make(map[string]*folderDeletionState),
logger: logger,
ctx: ctx,
}
case EFolderPropertiesOption.NoFolders():
// no point in using a real implementation here, since it will just use memory and take time for no benefit
return &nullFolderDeletionManager{}
default:
panic("unknown folderPropertiesOption")
}
}
// Note: the current implementation assumes that names are either case sensitive, or at least
// consistently capitalized. If it receives inconsistently capitalized things, it will think they are
// distinct, and so may try deletion prematurely and fail
type standardFolderDeletionManager struct {
mu *sync.Mutex // mutex is simpler than RWMutex because folderDeletionState has multiple mutable elements
contents map[string]*folderDeletionState // pointer so no need to put back INTO map after reading from map and mutating a field value
// have our own logger and context, because our deletions don't necessarily run when RequestDeletion is called
logger ILogger
ctx context.Context
}
func (s *standardFolderDeletionManager) clean(u *url.URL) string {
sasless := strings.Split(u.String(), "?")[0] // first ?, if it exists, is always start of query
cleaned, err := url.PathUnescape(sasless)
if err != nil {
panic("uncleanable url") // should never happen
}
return cleaned
}
// getParent drops final part of path (not using use path.Dir because it messes with the // in URLs)
func (s *standardFolderDeletionManager) getParent(u *url.URL) (string, bool) {
if len(u.Path) == 0 {
return "", false // path is already empty, so we can't go up another level
}
// trim off last portion of path (or all of the path, if it only has one component)
c := s.clean(u)
lastSlash := strings.LastIndex(c, "/")
return c[0:lastSlash], true
}
// getStateAlreadyLocked assumes the lock is already held
func (s *standardFolderDeletionManager) getStateAlreadyLocked(folder string) *folderDeletionState {
state, alreadyKnown := s.contents[folder]
if alreadyKnown {
return state
} else {
state = &folderDeletionState{}
s.contents[folder] = state
return state
}
}
func (s *standardFolderDeletionManager) RecordChildExists(childFileOrFolder *url.URL) {
folder, ok := s.getParent(childFileOrFolder)
if !ok {
return // this is not a child of any parent, so there is nothing for us to do
}
s.mu.Lock()
defer s.mu.Unlock()
folderStatePtr := s.getStateAlreadyLocked(folder)
folderStatePtr.childCount++
}
func (s *standardFolderDeletionManager) RecordChildDeleted(childFile *url.URL) {
folder, ok := s.getParent(childFile)
if !ok {
return // this is not a child of any parent, so there is nothing for us to do
}
s.mu.Lock()
folderStatePtr, alreadyKnown := s.contents[folder]
if !alreadyKnown {
// we are not tracking this child, so there is nothing that we should do in response
// to its deletion (may happen in the recursive calls from tryDeletion, when they recurse up to parent dirs)
s.mu.Unlock()
return
}
folderStatePtr.childCount--
if folderStatePtr.childCount < 0 {
// should never happen. If it does it means someone called RequestDeletion and Recorded a child as deleted, without ever registering the child as known
folderStatePtr.childCount = 0
}
deletionFunc := folderStatePtr.deleter
shouldDel := folderStatePtr.shouldDeleteNow()
s.mu.Unlock() // unlock before network calls for deletion
if shouldDel {
s.tryDeletion(folder, deletionFunc)
}
}
func (s *standardFolderDeletionManager) RequestDeletion(folder *url.URL, deletionFunc FolderDeletionFunc) {
folderStr := s.clean(folder)
s.mu.Lock()
folderStatePtr := s.getStateAlreadyLocked(folderStr)
folderStatePtr.deleter = deletionFunc
shouldDel := folderStatePtr.shouldDeleteNow() // test now in case there are no children
s.mu.Unlock() // release lock before expensive deletion attempt
if shouldDel {
s.tryDeletion(folderStr, deletionFunc)
}
}
func (s *standardFolderDeletionManager) tryDeletion(folder string, deletionFunc FolderDeletionFunc) {
success := deletionFunc(s.ctx, s.logger) // for safety, deletionFunc should be coded to do nothing, and return false, if the directory is not empty
if success {
s.mu.Lock()
delete(s.contents, folder)
s.mu.Unlock()
// folder is, itself, a child of its parent. So recurse. This is the only place that RecordChildDeleted should be called with a FOLDER parameter
u, err := url.Parse(folder)
if err != nil {
panic("folder url not parsable") // should never happen, because we started with a URL
}
s.RecordChildDeleted(u)
}
}
///////////////////////////////////////
type nullFolderDeletionManager struct{}
func (f *nullFolderDeletionManager) RecordChildExists(child *url.URL) {
// no-op
}
func (f *nullFolderDeletionManager) RecordChildDeleted(child *url.URL) {
// no-op
}
func (f *nullFolderDeletionManager) RequestDeletion(folder *url.URL, deletionFunc FolderDeletionFunc) {
// There's no way this should ever be called, because we only create the null deletion manager if we are
// NOT transferring folder info.
panic("wrong type of folder deletion manager has been instantiated. This type does not do anything")
}