forked from discourse/discourse-json-s3-export
-
Notifications
You must be signed in to change notification settings - Fork 0
/
plugin.rb
105 lines (85 loc) · 3.11 KB
/
plugin.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# name: disource-json-s3-exporter
# about: Export your Discourse data to S3 in Json Format
# version: 0.0.1
# authors: Samer Masry <samer.masry@gmail.com>
enabled_site_setting :json_s3_export_enabled
after_initialize do
require_dependency 'jobs/base'
module ::Jobs
class ExportTableToS3 < Jobs::Base
BATCH_SIZE = 1000.freeze
BLACK_LISTED_COLUMNS = {
'users' => [
:encrypted_password, :reset_password_token,
:old_password, :password_salt
]
}.freeze
sidekiq_options queue: 'low'
def execute(args)
ar_class = args[:class_name].constantize
start_id = args[:start_id] || 1
table_name = ar_class.table_name
if SiteSetting.json_s3_export_clear_files_before_upload && start_id == 1
delete_existing_files!(table_name)
end
ar_class.connection.transaction do
ar_class.connection.execute('SET TRANSACTION ISOLATION LEVEL SERIALIZABLE')
ar_class.all.find_in_batches(start: start_id, batch_size: BATCH_SIZE).with_index do |group, batch|
Tempfile.open("#{table_name}-#{start_id}") do |f|
Zlib::GzipWriter.open(f) do |gz|
group.each do |record|
gz.puts record.attributes.except(*BLACK_LISTED_COLUMNS[table_name]).to_json
end
end
# Upload File to S3
upload_to_s3("#{table_name}/data-#{start_id}.gz", f)
end
# schedule next set
next_id = group.last[ar_class.primary_key] + 1
Jobs.enqueue(:export_table_to_s3, class_name: ar_class.to_s,
start_id: next_id)
break
end
end
end
private
def upload_to_s3(file_name, data)
obj = s3_bucket.object(file_name)
obj.upload_file(data, server_side_encryption: 'AES256')
end
def aws_client
Aws::S3::Client.new(
region: SiteSetting.json_s3_export_region,
access_key_id: SiteSetting.json_s3_export_access_key,
secret_access_key: SiteSetting.json_s3_export_secret_key
)
end
def s3_bucket
s3 = Aws::S3::Resource.new(client: aws_client)
s3.bucket(SiteSetting.json_s3_export_bucket)
end
def delete_existing_files!(prefix)
s3_bucket.objects(prefix: prefix).batch_delete!
end
end
end
module ::Jobs
class JsonS3Export < Jobs::Scheduled
sidekiq_options queue: 'low'
CLASSES_FOR_EXPORT = [
BadgeGrouping, BadgeType, Badge, Category, CategoryFeaturedTopic,
CategoryGroup, CategoryTagGroup, CategoryTag,
CategoryUser, GroupMention, GroupUser, Group, PostActionType,
PostDetail, Post, TagGroup, Tag, TagUser, TopTopic, TopicTag,
TopicUser, Topic, UserAction, UserBadge, UserVisit, User
].freeze
every 1.day
def execute(args)
return unless SiteSetting.json_s3_export_enabled
CLASSES_FOR_EXPORT.each do |ar_class|
Jobs.enqueue(:export_table_to_s3, class_name: ar_class.to_s)
end
end
end
end
end