/
upload.rb
256 lines (210 loc) · 8.24 KB
/
upload.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# frozen_string_literal: true
class Upload < ApplicationRecord
extend Memoist
class Error < StandardError; end
# The list of allowed archive file types.
ARCHIVE_FILE_TYPES = %i[zip rar 7z]
# The maximum number of files allowed per upload.
MAX_FILES_PER_UPLOAD = 100
# The maximum number of 'pending' or 'processing' media assets a single user can have at once.
MAX_QUEUED_ASSETS = 250
attr_accessor :files
belongs_to :uploader, class_name: "User"
has_many :upload_media_assets, dependent: :destroy
has_many :media_assets, through: :upload_media_assets
has_many :posts, through: :media_assets
normalize :source, :normalize_source
validates :source, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { source.present? }
validates :referer_url, format: { with: %r{\Ahttps?://}i, message: "is not a valid URL" }, if: -> { referer_url.present? }
validates :status, inclusion: { in: %w[pending processing completed error] }
validate :validate_file_and_source, on: :create
validate :validate_archive_files, on: :create
validate :validate_uploader_is_not_limited, on: :create
after_create :async_process_upload!
scope :pending, -> { where(status: "pending") }
scope :processing, -> { where(status: "processing") }
scope :completed, -> { where(status: "completed") }
scope :failed, -> { where(status: "error") }
scope :expired, -> { processing.where(created_at: ..4.hours.ago) }
def self.visible(user)
if user.is_admin?
all
else
where(uploader: user)
end
end
def self.prune!
expired.update_all(status: "error", error: "Stuck processing for more than 4 hours")
end
concerning :StatusMethods do
def is_pending?
status == "pending"
end
def is_processing?
status == "processing"
end
def is_completed?
status == "completed"
end
def is_errored?
status == "error"
end
def is_finished?
is_completed? || is_errored?
end
end
concerning :ValidationMethods do
def validate_file_and_source
if files.present? && source.present?
errors.add(:base, "Can't give both a file and a source")
elsif files.blank? && source.blank?
errors.add(:base, "No file or source given")
end
end
def validate_uploader_is_not_limited
queued_asset_count = uploader.upload_media_assets.unfinished.count
if queued_asset_count > MAX_QUEUED_ASSETS
errors.add(:base, "You have too many images queued for upload (queued: #{queued_asset_count}; limit: #{MAX_QUEUED_ASSETS}). Try again later.")
end
end
def validate_archive_files
return unless files.present?
archive_files.each do |archive, filename|
if !archive.file_ext.in?(ARCHIVE_FILE_TYPES)
errors.add(:base, "'#{filename}' is not a supported file type")
elsif archive.exists? { |_, count| count > MAX_FILES_PER_UPLOAD }
# XXX Potential zip bomb containing thousands of files; don't process it any further.
errors.add(:base, "'#{filename}' contains too many files (max #{MAX_FILES_PER_UPLOAD} files per upload)")
next
elsif archive.uncompressed_size > MediaAsset::MAX_FILE_SIZE
errors.add(:base, "'#{filename}' is too large (uncompressed size: #{archive.uncompressed_size.to_fs(:human_size)}; max size: #{MediaAsset::MAX_FILE_SIZE.to_fs(:human_size)})")
elsif entry = archive.entries.find { |entry| entry.pathname.starts_with?("/") }
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't start with '/'")
elsif entry = archive.entries.find { |entry| entry.directory_traversal? }
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' can't contain '..' components")
elsif entry = archive.entries.find { |entry| !entry.file? && !entry.directory? }
errors.add(:base, "'#{entry.pathname_utf8}' in '#{filename}' isn't a regular file")
end
end
total_files = archive_files.map(&:first).sum(&:file_count) + (files.size - archive_files.size)
if total_files > MAX_FILES_PER_UPLOAD
errors.add(:base, "Can't upload more than #{MAX_FILES_PER_UPLOAD} files at a time (total: #{total_files})")
end
end
end
concerning :SourceMethods do
class_methods do
# Decode percent-encoded Unicode characters in the URL
def normalize_source(url)
Danbooru::URL.parse(url)&.to_normalized_s.presence || url
end
end
end
def self.ai_tags_match(tag_string, score_range: (50..))
upload_media_assets = MediaAssetQuery.search(tag_string, relation: UploadMediaAsset.joins(:media_asset), foreign_key: :media_asset_id, score_range: score_range)
where(upload_media_assets.where("upload_media_assets.upload_id = uploads.id").arel.exists)
end
def self.search(params, current_user)
q = search_attributes(params, [:id, :created_at, :updated_at, :source, :referer_url, :status, :media_asset_count, :uploader, :upload_media_assets, :media_assets, :posts], current_user: current_user)
if params[:ai_tags_match].present?
min_score = params.fetch(:min_score, 50).to_i
q = q.ai_tags_match(params[:ai_tags_match], score_range: (min_score..))
end
if params[:is_posted].to_s.truthy?
q = q.where.not(id: Upload.where.missing(:posts))
elsif params[:is_posted].to_s.falsy?
q = q.where(id: Upload.where.missing(:posts))
end
case params[:order]
when "id", "id_desc"
q = q.order(id: :desc)
when "id_asc"
q = q.order(id: :asc)
when "random"
q = q.order("random()")
else
q = q.apply_default_order(params)
end
q
end
def async_process_upload!
if files.present?
process_upload!
elsif source.present?
ProcessUploadJob.perform_later(self)
else
raise "No file or source given" # Should never happen
end
end
def process_upload!
update!(status: "processing")
if files.present?
process_file_upload!
elsif source.present?
process_source_upload!
else
raise Error, "No file or source given" # Should never happen
end
rescue Exception => e
update!(status: "error", error: e.message)
end
def process_source_upload!
page_url = source_extractor.page_url
image_urls = source_extractor.image_urls
if image_urls.empty?
raise Error, "#{source} doesn't contain any images"
end
upload_media_assets = image_urls.map do |image_url|
UploadMediaAsset.new(upload: self, source_url: image_url, page_url: page_url, media_asset: nil)
end
transaction do
update!(media_asset_count: upload_media_assets.size)
upload_media_assets.each(&:save!)
end
end
def process_file_upload!
tmpdirs = []
upload_media_assets = uploaded_files.flat_map do |file, original_filename|
if file.is_a?(Danbooru::Archive)
tmpdir, filenames = file.extract!
tmpdirs << tmpdir
Danbooru.natural_sort(filenames).map do |filename|
name = "file://#{original_filename}/#{Pathname.new(filename).relative_path_from(tmpdir)}" # "file://foo.zip/foo/1.jpg"
UploadMediaAsset.new(upload: self, file: filename, source_url: name)
end
else
UploadMediaAsset.new(upload: self, file: file, source_url: "file://#{original_filename}")
end
end
transaction do
update!(media_asset_count: upload_media_assets.size)
upload_media_assets.each(&:save!)
end
ensure
tmpdirs.each { |tmpdir| FileUtils.rm_rf(tmpdir) }
end
# The list of files uploaded from disk, with their filenames.
def uploaded_files
files.map do |_index, file|
if FileTypeDetector.new(file.tempfile).file_ext.in?(ARCHIVE_FILE_TYPES)
[Danbooru::Archive.open!(file.tempfile), file.original_filename]
else
[MediaFile.open(file.tempfile), file.original_filename]
end
end
end
# The list of archive files uploaded from disk, with their filenames.
def archive_files
uploaded_files.select do |file, original_filename|
file.is_a?(Danbooru::Archive)
end
end
def source_extractor
return nil if source.blank?
Source::Extractor.find(source, referer_url)
end
def self.available_includes
[:uploader, :upload_media_assets, :media_assets, :posts]
end
memoize :source_extractor, :archive_files, :uploaded_files
end