Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions app/models/upload.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
require "digest/sha1"

class Upload < ActiveRecord::Base
self.ignored_columns = [
"verified" # TODO(2020-12-10): remove
]

include ActionView::Helpers::NumberHelper
include HasUrl

Expand Down Expand Up @@ -51,6 +55,14 @@ def access_control_post

scope :by_users, -> { where("uploads.id > ?", SEEDED_ID_THRESHOLD) }

def self.verification_statuses
@verification_statuses ||= Enum.new(
unchecked: 1,
verified: 2,
invalid_etag: 3
)
end
Copy link
Contributor

@lis2 lis2 Sep 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious if here wouldn't be beneficial to use rails enums.

That would by default define scopes so for example later instead of
Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
We would be able to simplify it to
Upload.invalid_etag

I grepped and we are not using enums too often so maybe there is a reason for that. I only found one in ApplicationRequest model

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure, we have lib/enum.rb so I am just being consistent. IMO we could easily make the API easier so at least we could do Upload.verification_statuses.invalid_etag


def to_s
self.url
end
Expand Down Expand Up @@ -449,6 +461,7 @@ def short_url_basename
# access_control_post_id :bigint
# original_sha1 :string
# verified :boolean
# verification_status :integer default(1), not null
#
# Indexes
#
Expand Down
21 changes: 21 additions & 0 deletions db/migrate/20200910051633_change_uploads_verified_to_integer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# frozen_string_literal: true

class ChangeUploadsVerifiedToInteger < ActiveRecord::Migration[6.0]
def up
add_column :uploads, :verification_status, :integer, null: false, default: 1
Migration::ColumnDropper.mark_readonly(:uploads, :verified)

DB.exec(
<<~SQL
UPDATE uploads SET verification_status = CASE WHEN
verified THEN 2
WHEN NOT verified THEN 3
END
SQL
)
end

def down
remove_column :uploads, :verification_status
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

class AddIndexToUploadsVerificationStatus < ActiveRecord::Migration[6.0]
disable_ddl_transaction!

def up
execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
idx_uploads_on_verification_status ON uploads USING btree (verification_status)
SQL
end

def down
execute "DROP INDEX IF EXISTS idx_uploads_on_verification_status"
end
end
30 changes: 21 additions & 9 deletions lib/s3_inventory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,22 +71,34 @@ def backfill_etags_and_list_missing
.joins("LEFT JOIN #{table_name} inventory2 ON inventory2.url = #{model.table_name}.url")
.where("inventory2.etag IS NOT NULL").pluck(:id)

# marking as verified/not verified
if model == Upload
# marking as verified/not verified
DB.exec(<<~SQL, inventory_date
sql_params = {
inventory_date: inventory_date,
unchecked: Upload.verification_statuses[:unchecked],
invalid_etag: Upload.verification_statuses[:invalid_etag],
verified: Upload.verification_statuses[:verified]
}
DB.exec(<<~SQL, sql_params)
UPDATE #{model.table_name}
SET verified = CASE when table_name_alias.etag IS NULL THEN false ELSE true END
SET verification_status = CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
FROM #{model.table_name} AS model_table
LEFT JOIN #{table_name} AS table_name_alias ON model_table.etag = table_name_alias.etag
LEFT JOIN #{table_name} AS table_name_alias ON
model_table.etag = table_name_alias.etag
WHERE model_table.id = #{model.table_name}.id
AND model_table.updated_at < ?
AND model_table.updated_at < :inventory_date
AND (
model_table.verified IS NULL OR
model_table.verified <> CASE when table_name_alias.etag IS NULL THEN false ELSE true END
model_table.verification_status = :unchecked OR
model_table.verification_status <> CASE WHEN table_name_alias.etag IS NULL
THEN :invalid_etag
ELSE :verified
END
)
AND model_table.id > #{model::SEEDED_ID_THRESHOLD}
SQL
)
SQL
end

if (missing_count = missing_uploads.count) > 0
Expand Down
18 changes: 11 additions & 7 deletions lib/tasks/uploads.rake
Original file line number Diff line number Diff line change
Expand Up @@ -1001,15 +1001,15 @@ def analyze_missing_s3
SELECT post_id, url, sha1, extension, uploads.id
FROM post_uploads pu
RIGHT JOIN uploads on uploads.id = pu.upload_id
WHERE NOT verified
WHERE verification_status = :invalid_etag
ORDER BY created_at
SQL

lookup = {}
other = []
all = []

DB.query(sql).each do |r|
DB.query(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |r|
all << r
if r.post_id
lookup[r.post_id] ||= []
Expand All @@ -1029,7 +1029,7 @@ def analyze_missing_s3
puts
end

missing_uploads = Upload.where(verified: false)
missing_uploads = Upload.where(verification_status: Upload.verification_statuses[:invalid_etag])
puts "Total missing uploads: #{missing_uploads.count}, newest is #{missing_uploads.maximum(:created_at)}"
puts "Total problem posts: #{lookup.keys.count} with #{lookup.values.sum { |a| a.length } } missing uploads"
puts "Other missing uploads count: #{other.count}"
Expand Down Expand Up @@ -1067,7 +1067,9 @@ def analyze_missing_s3
end

def delete_missing_s3
missing = Upload.where(verified: false).order(:created_at)
missing = Upload.where(
verification_status: Upload.verification_statuses[:invalid_etag]
).order(:created_at)
count = missing.count
if count > 0
puts "The following uploads will be deleted from the database"
Expand Down Expand Up @@ -1110,7 +1112,9 @@ def fix_missing_s3
Jobs.run_immediately!

puts "Attempting to download missing uploads and recreate"
ids = Upload.where(verified: false).pluck(:id)
ids = Upload.where(
verification_status: Upload.verification_statuses[:invalid_etag]
).pluck(:id)
ids.each do |id|
upload = Upload.find(id)

Expand Down Expand Up @@ -1165,11 +1169,11 @@ def fix_missing_s3
SELECT post_id
FROM post_uploads pu
JOIN uploads on uploads.id = pu.upload_id
WHERE NOT verified
WHERE verification_status = :invalid_etag
ORDER BY post_id DESC
SQL

DB.query_single(sql).each do |post_id|
DB.query_single(sql, invalid_etag: Upload.verification_statuses[:invalid_etag]).each do |post_id|
post = Post.find_by(id: post_id)
if post
post.rebake!
Expand Down
10 changes: 5 additions & 5 deletions spec/components/s3_inventory_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@
end

it "marks missing uploads as not verified and found uploads as verified. uploads not checked will be verified nil" do
expect(Upload.where(verified: nil).count).to eq(12)
expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(12)
output = capture_stdout do
inventory.backfill_etags_and_list_missing
end

verified = Upload.pluck(:verified)
expect(Upload.where(verified: true).count).to eq(3)
expect(Upload.where(verified: false).count).to eq(2)
expect(Upload.where(verified: nil).count).to eq(7)
verification_status = Upload.pluck(:verification_status)
expect(Upload.where(verification_status: Upload.verification_statuses[:verified]).count).to eq(3)
expect(Upload.where(verification_status: Upload.verification_statuses[:invalid_etag]).count).to eq(2)
expect(Upload.where(verification_status: Upload.verification_statuses[:unchecked]).count).to eq(7)
end

it "does not affect the updated_at date of uploads" do
Expand Down