Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for private S3 buckets #25

Merged
merged 2 commits into from May 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion Gemfile
Expand Up @@ -2,8 +2,9 @@ source "https://rubygems.org"
gemspec

gem "bump"
gem "byebug"
gem 'mime-types'
gem "rake"
gem "rspec", "~>3"
gem "byebug"
gem "single_cov"
gem "stub_server"
6 changes: 5 additions & 1 deletion Gemfile.lock
Expand Up @@ -30,6 +30,9 @@ GEM
debugger-linecache (1.2.0)
diff-lcs (1.2.5)
jmespath (1.4.0)
mime-types (3.3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2020.0425)
rack (2.0.1)
rake (10.0.3)
rspec (3.5.0)
Expand All @@ -55,11 +58,12 @@ PLATFORMS
DEPENDENCIES
bump
byebug
mime-types
rake
rspec (~> 3)
s3_meta_sync!
single_cov
stub_server

BUNDLED WITH
1.16.3
1.17.3
102 changes: 80 additions & 22 deletions lib/s3_meta_sync/syncer.rb
Expand Up @@ -7,17 +7,24 @@
require "fileutils"
require "tmpdir"
require "openssl"
require "mime/types"

require "aws-sdk-s3"
require "s3_meta_sync/zip"

module S3MetaSync
class Syncer
DEFAULT_REGION = 'us-east-1'
DEFAULT_REGION = "us-east-1"
STAGING_AREA_PREFIX = "s3ms_"

AWS_PUBLIC_ACCESS = "public-read"
AWS_PRIVATE_ACCESS = "private"

def initialize(config)
@config = config
@config = {
acl: AWS_PUBLIC_ACCESS,
region: DEFAULT_REGION
}.merge(config)
end

def sync(source, destination)
Expand Down Expand Up @@ -95,7 +102,7 @@ def download(source, destination)
# Sometimes SIGTERM causes Dir.mktmpdir to not properly delete the temp folder
# Remove 1 day old folders
def delete_old_temp_folders
path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX + '*')
path = File.join(Dir.tmpdir, STAGING_AREA_PREFIX + "*")

day = 24 * 60 * 60
dirs = Dir.glob(path)
Expand Down Expand Up @@ -163,10 +170,12 @@ def upload_file(source, path, destination)
content = Zip.zip(content) if @config[:zip] && path != META_FILE

object = {
acl: @config[:acl],
bucket: @bucket,
body: content,
key: "#{destination}/#{path}",
acl: 'public-read'
content_encoding: content.encoding.to_s,
content_type: MIME::Types.of(path).first.to_s,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

neat!

key: "#{destination}/#{path}"
}

object[:server_side_encryption] = @config[:server_side_encryption] if @config[:server_side_encryption]
Expand Down Expand Up @@ -197,11 +206,19 @@ def delete_local_files(local, paths)
end

def s3
@s3 ||= Aws::S3::Client.new(
access_key_id: @config[:key],
secret_access_key: @config[:secret],
region: @config[:region] || 'us-west-2'
)
@s3 ||= begin
config = { region: @config[:region] }

if @config[:credentials_path]
config[:credentials] = Aws::SharedCredentials.new(path: @config[:credentials_path], profile_name: "default")
else
config[:access_key_id] = @config[:key]
config[:secret_access_key] = @config[:secret]
config[:session_token] = @config[:session_token] if @config[:session_token]
end

Aws::S3::Client.new(config)
end
end

def generate_meta(source)
Expand Down Expand Up @@ -239,19 +256,43 @@ def read_meta(source)
end

def download_meta(destination)
if private?
private_access_download_meta(destination)
else
public_access_download_meta(destination)
end
end

def private_access_download_meta(destination)
content = private_content_download(destination, META_FILE).string

raise S3MetaSync::RemoteWithoutMeta if content.empty? # if missing, upload everything

parse_yaml_content(content)
rescue Aws::S3::Errors::NoSuchKey, Aws::S3::Errors::AccessDenied # if requesting a file that doesn't exist AccessDenied is raised
retries ||= 0

raise S3MetaSync::RemoteWithoutMeta if retries >= 1

retries += 1
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
retry
end

def public_access_download_meta(destination)
content = download_content("#{destination}/#{META_FILE}") { |io| io.read }
raise OpenURI::HTTPError.new('Content is empty', nil) unless content.size > 0

raise OpenURI::HTTPError.new("Content is empty", nil) if content.size == 0

parse_yaml_content(content)
rescue OpenURI::HTTPError
retries ||= 0

raise S3MetaSync::RemoteWithoutMeta if retries >= 1

retries += 1
if retries <= 1
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
retry
else
raise RemoteWithoutMeta
end
sleep 1 # maybe the remote meta was just updated ... give aws a second chance ...
retry
end

def parse_yaml_content(content)
Expand All @@ -260,16 +301,29 @@ def parse_yaml_content(content)
end

def download_file(source, path, destination, zip)
download = download_content("#{source}/#{path}") # warning: using block form consumes more ram
download = Zip.unzip(download) if zip
path = "#{destination}/#{path}"
FileUtils.mkdir_p(File.dirname(path))
download = if private?
private_content_download(source, path)
else
public_content_download(source, path)
end

download = S3MetaSync::Zip.unzip(download) if zip
FileUtils.mkdir_p(File.dirname("#{destination}/#{path}"))

# consumes less ram then File.write(path, content), possibly also faster
File.open(path, 'wb') { |f| IO.copy_stream(download, f) }
File.open("#{destination}/#{path}", "wb") { |f| IO.copy_stream(download, f) }
download.close
end

def private_content_download(source, path)
obj = s3.get_object(bucket: @bucket, key: "#{source}/#{path}")
obj.body
end

def public_content_download(source, path)
download_content("#{source}/#{path}") # warning: using block form consumes more ram
end

def download_content(path)
log "Downloading #{path}"
url =
Expand Down Expand Up @@ -346,5 +400,9 @@ def url?(source)
def log(text, important=false)
$stderr.puts text if @config[:verbose] or important
end

def private?
@config[:acl] == AWS_PRIVATE_ACCESS
end
end
end
98 changes: 93 additions & 5 deletions spec/s3_meta_sync_spec.rb
Expand Up @@ -76,6 +76,8 @@ def upload(file, content)
expect(syncer.send(:download_content, "bar/#{file}").read).to eq(content)
end

before { allow($stderr).to receive(:puts) }

around do |test|
Dir.mktmpdir do |dir|
Dir.chdir(dir, &test)
Expand All @@ -101,10 +103,8 @@ def upload(file, content)
end

describe "#sync" do
before do
allow($stderr).to receive(:puts)
upload_simple_structure
end
before { upload_simple_structure }

after { cleanup_s3 }

context "sync local to remote" do
Expand Down Expand Up @@ -567,7 +567,9 @@ def call(*args)
end

describe "with retries option" do
before { config[:max_retries] = 3 }
def config
super.merge(max_retries: 3)
end

it "retries more than 3 times on a HTTP error" do
expect(syncer).to receive(:open).exactly(4).and_raise OpenURI::HTTPError.new('http error', nil)
Expand Down Expand Up @@ -656,4 +658,90 @@ def sync(command, options={})
end
end
end

describe "private S3 objects" do
let(:private_acl) { 'private' }
let(:bar_md5) { "---\n:files:\n xxx: f5271ace09a56600e1cef7663d932807\n" }

def syncer
config[:acl] = private_acl
S3MetaSync::Syncer.new(config)
end

def upload_simple_structure
sh "mkdir foo && echo こんにちは > foo/xxx"
syncer.sync("foo", "#{bucket}:bar")
end

def download_private(file)
s3.get_object(bucket: bucket, key: file)
end

before { upload_simple_structure }

after { cleanup_s3 }

it "uploads files" do
expect(download_private("bar/xxx").body.read).to eq("こんにちは\n")
expect(download_private("bar/.s3-meta-sync").body.read).to eq(bar_md5)
end

it "uploads file to S3" do
expect { download_private("boo/xxx") }.to raise_error(Aws::S3::Errors::NoSuchKey)

syncer.sync("foo", "#{bucket}:boo")

expect(download_private("boo/xxx").body.read).to eq("こんにちは\n")
end

it "downloads from S3 bucket" do
expect(File.exists?("boo/xxx")).to be false

syncer.sync("#{bucket}:bar", "boo")

expect(File.exists?("boo/xxx")).to be true
end

it "sets content encoding" do
sh "mkdir baz && echo さようなら > baz/japanese"
syncer.sync("baz", "#{bucket}:baz")

expect(download_private("baz/japanese").content_encoding).to eq("UTF-8")
end

it "sets content type" do
sh "mkdir baz && echo こんにちは > baz/japanese.json"
syncer.sync("baz", "#{bucket}:baz")

expect(download_private("baz/japanese.json").content_type).to eq("application/json")
end

context "using aws credentials file" do
def write_credentials_file
FileUtils.mkdir_p("aws")
File.write("aws/credentials", "[default]\naws_access_key_id=#{key}\naws_secret_access_key=#{secret}")
end

def syncer
write_credentials_file

config[:acl] = private_acl
config[:credentials_path] = "aws/credentials"

S3MetaSync::Syncer.new(config)
end

it "uploads to S3" do
expect { download_private("boo/xxx") }.to raise_error(Aws::S3::Errors::NoSuchKey)
syncer.sync("foo", "#{bucket}:boo")
expect(download_private("boo/xxx").body.read).to eq("こんにちは\n")
end

it "downloads from S3 bucket" do
expect(File.exists?("boo/xxx")).to be false
syncer.sync("#{bucket}:bar", "boo")
expect(File.exists?("boo/xxx")).to be true
end
end
end
end