From 9d1a532fbdc47afbf85e73603acd0bdd26ae357a Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Fri, 1 Mar 2024 15:22:21 +0100 Subject: [PATCH 1/2] Replace zip_tricks with zip_kit zip_kit is a friendly fork of zip_tricks under MIT license. It includes new features and bug fixes. Use its header/body generation, and use `write_file` for heuristic storage mode (pick the storage mode which is optimal for the content) --- README.md | 6 +-- lib/zipline.rb | 31 +++------------ lib/zipline/chunked_body.rb | 31 --------------- lib/zipline/tempfile_body.rb | 52 -------------------------- lib/zipline/version.rb | 2 +- lib/zipline/zip_generator.rb | 40 +++++++++++++------- spec/lib/zipline/zip_generator_spec.rb | 2 +- spec/lib/zipline/zipline_spec.rb | 8 ++-- zipline.gemspec | 2 +- 9 files changed, 41 insertions(+), 133 deletions(-) delete mode 100644 lib/zipline/chunked_body.rb delete mode 100644 lib/zipline/tempfile_body.rb diff --git a/README.md b/README.md index 6924ee8..4da4686 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A gem to stream dynamically generated zip files from a rails application. Unlike - Removes need for large disk space or memory allocation to generate zips, even huge zips. So it works on Heroku. - The user begins downloading immediately, which decreaceses latency, download time, and timeouts on Heroku. -Zipline now depends on [zip tricks](https://github.com/WeTransfer/zip_tricks), and you might want to just use that directly if you have more advanced use cases. +Zipline now depends on [zip_kit](https://github.com/julik/zip_kit), and you might want to just use that directly if you have more advanced use cases. ## Installation @@ -43,7 +43,7 @@ class MyController < ApplicationController files = users.map{ |user| [user.avatar, "#{user.username}.png", modification_time: 1.day.ago] } # we can force duplicate file names to be renamed, or raise an error - # we can also pass in our own writer if required to conform with the Delegated [ZipTricks::Streamer object](https://github.com/WeTransfer/zip_tricks/blob/main/lib/zip_tricks/streamer.rb#L147) object. + # we can also pass in our own writer if required to conform with the delegated [ZipKit::Streamer object](https://github.com/julik/zip_kit/blob/main/lib/zip_kit/streamer.rb#L147) object. zipline(files, 'avatars.zip', auto_rename_duplicate_filenames: true) end end @@ -93,7 +93,7 @@ For directories, just give the files names like "directory/file". ```Ruby avatars = [ - # remote_url zip_path zip_tricks_options + # remote_url zip_path write_file options for Streamer [ 'http://www.example.com/user1.png', 'avatars/user1.png', modification_time: Time.now.utc ] [ 'http://www.example.com/user2.png', 'avatars/user2.png', modification_time: 1.day.ago ] [ 'http://www.example.com/user3.png', 'avatars/user3.png' ] diff --git a/lib/zipline.rb b/lib/zipline.rb index 5d2bc49..594e4c4 100644 --- a/lib/zipline.rb +++ b/lib/zipline.rb @@ -1,9 +1,7 @@ require 'content_disposition' +require 'zip_kit' require 'zipline/version' -require 'zip_tricks' require 'zipline/zip_generator' -require 'zipline/chunked_body' -require 'zipline/tempfile_body' # class MyController < ApplicationController # include Zipline @@ -15,7 +13,6 @@ # end module Zipline def zipline(files, zipname = 'zipline.zip', **kwargs_for_new) - zip_generator = ZipGenerator.new(files, **kwargs_for_new) headers['Content-Disposition'] = ContentDisposition.format(disposition: 'attachment', filename: zipname) headers['Content-Type'] = Mime::Type.lookup_by_extension('zip').to_s response.sending_file = true @@ -24,32 +21,14 @@ def zipline(files, zipname = 'zipline.zip', **kwargs_for_new) # Disables Rack::ETag if it is enabled (prevent buffering) # see https://github.com/rack/rack/issues/1619#issuecomment-606315714 self.response.headers['Last-Modified'] = Time.now.httpdate - if request.get_header("HTTP_VERSION") == "HTTP/1.0" # If HTTP/1.0 is used it is not possible to stream, and if that happens it usually will be # unclear why buffering is happening. Some info in the log is the least one can do. logger.warn { "The downstream HTTP proxy/LB insists on HTTP/1.0 protocol, ZIP response will be buffered." } if logger - - # If we use Rack::ContentLength it would run through our ZIP block twice - once to calculate the content length - # of the response, and once - to serve. We can trade performance for disk space and buffer the response into a Tempfile - # since we are already buffering. - tempfile_body = TempfileBody.new(request.env, zip_generator) - headers["Content-Length"] = tempfile_body.size.to_s - headers["X-Zipline-Output"] = "buffered" - self.response_body = tempfile_body - else - # Disable buffering for both nginx and Google Load Balancer, see - # https://cloud.google.com/appengine/docs/flexible/how-requests-are-handled?tab=python#x-accel-buffering - response.headers["X-Accel-Buffering"] = "no" - - # Make sure Rack::ContentLength does not try to compute a content length, - # and remove the one already set - headers.delete("Content-Length") - - # and send out in chunked encoding - headers["Transfer-Encoding"] = "chunked" - headers["X-Zipline-Output"] = "streamed" - self.response_body = Chunked.new(zip_generator) end + + zip_generator = ZipGenerator.new(request.env, files, **kwargs_for_new) + response.headers.merge!(zip_generator.headers) + self.response_body = zip_generator end end diff --git a/lib/zipline/chunked_body.rb b/lib/zipline/chunked_body.rb deleted file mode 100644 index f226b4c..0000000 --- a/lib/zipline/chunked_body.rb +++ /dev/null @@ -1,31 +0,0 @@ -module Zipline - # A body wrapper that emits chunked responses, creating valid - # "Transfer-Encoding: chunked" HTTP response body. This is copied from Rack::Chunked::Body, - # because Rack is not going to include that class after version 3.x - # Rails has a substitute class for this inside ActionController::Streaming, - # but that module is a private constant in the Rails codebase, and is thus - # considered "private" from the Rails standpoint. It is not that much code to - # carry, so we copy it into our code. - class Chunked - TERM = "\r\n" - TAIL = "0#{TERM}" - - def initialize(body) - @body = body - end - - # For each string yielded by the response body, yield - # the element in chunked encoding - and finish off with a terminator - def each - term = TERM - @body.each do |chunk| - size = chunk.bytesize - next if size == 0 - - yield [size.to_s(16), term, chunk.b, term].join - end - yield TAIL - yield term - end - end -end diff --git a/lib/zipline/tempfile_body.rb b/lib/zipline/tempfile_body.rb deleted file mode 100644 index 7723d05..0000000 --- a/lib/zipline/tempfile_body.rb +++ /dev/null @@ -1,52 +0,0 @@ -module Zipline - # Contains a file handle which can be closed once the response finishes sending. - # It supports `to_path` so that `Rack::Sendfile` can intercept it - class TempfileBody - TEMPFILE_NAME_PREFIX = "zipline-tf-body-" - attr_reader :tempfile - - # @param body[#each] the enumerable that yields bytes, usually a `RackBody`. - # The `body` will be read in full immediately and closed. - def initialize(env, body) - @tempfile = Tempfile.new(TEMPFILE_NAME_PREFIX) - # Rack::TempfileReaper calls close! on tempfiles which get buffered - # We wil assume that it works fine with Rack::Sendfile (i.e. the path - # to the file getting served gets used before we unlink the tempfile) - env['rack.tempfiles'] ||= [] - env['rack.tempfiles'] << @tempfile - - @tempfile.binmode - - body.each { |bytes| @tempfile << bytes } - body.close if body.respond_to?(:close) - - @tempfile.flush - end - - # Returns the size of the contained `Tempfile` so that a correct - # Content-Length header can be set - # - # @return [Integer] - def size - @tempfile.size - end - - # Returns the path to the `Tempfile`, so that Rack::Sendfile can send this response - # using the downstream webserver - # - # @return [String] - def to_path - @tempfile.to_path - end - - # Stream the file's contents if `Rack::Sendfile` isn't present. - # - # @return [void] - def each - @tempfile.rewind - while chunk = @tempfile.read(16384) - yield chunk - end - end - end -end diff --git a/lib/zipline/version.rb b/lib/zipline/version.rb index dbfe957..b62df75 100644 --- a/lib/zipline/version.rb +++ b/lib/zipline/version.rb @@ -1,3 +1,3 @@ module Zipline - VERSION = "1.6.0" + VERSION = "1.7.0" end diff --git a/lib/zipline/zip_generator.rb b/lib/zipline/zip_generator.rb index 61fd861..cd15801 100644 --- a/lib/zipline/zip_generator.rb +++ b/lib/zipline/zip_generator.rb @@ -3,25 +3,22 @@ module Zipline class ZipGenerator # takes an array of pairs [[uploader, filename], ... ] - def initialize(files, **kwargs_for_streamer) - # Use RackBody as it has buffering built-in in zip_tricks 5.x+ - @body = ZipTricks::RackBody.new(**kwargs_for_streamer) do |streamer| + def initialize(env, files, **kwargs_for_streamer) + zip_enumerator = ZipKit::OutputEnumerator.new(**kwargs_for_streamer) do |streamer| files.each do |file, name, options = {}| handle_file(streamer, file, name.to_s, options) end end + + @headers, @body = recording_stream_exceptions do + # If buffering is used, the streaming block will be executed immediately to compute Content-Length + zip_enumerator.to_headers_and_rack_response_body(env) + end end def each(&block) return to_enum(:each) unless block_given? - @body.each(&block) - rescue => e - # Since most APM packages do not trace errors occurring within streaming - # Rack bodies, it can be helpful to print the error to the Rails log at least - error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body." - error_message += " The error occurred when handling #{@filename.inspect}" if @filename - logger.error(error_message) - raise + recording_stream_exceptions { @body.each(&block) } end def handle_file(streamer, file, name, options) @@ -72,7 +69,7 @@ def normalize(file) end def write_file(streamer, file, name, options) - streamer.write_deflated_file(name, **options.slice(:modification_time)) do |writer_for_file| + streamer.write_file(name, **options.slice(:modification_time)) do |writer_for_file| if file[:url] the_remote_uri = URI(file[:url]) @@ -92,12 +89,27 @@ def write_file(streamer, file, name, options) end end - def is_io?(io_ish) - io_ish.respond_to? :read + def headers + @headers end private + def recording_stream_exceptions + yield + rescue => e + # Since most APM packages do not trace errors occurring within streaming + # Rack bodies, it can be helpful to print the error to the Rails log at least + error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body." + error_message += " The error occurred when handling #{@filename.inspect}" if @filename + logger.error(error_message) + raise + end + + def is_io?(io_ish) + io_ish.respond_to? :read + end + def logger # Rails is not defined in our tests, and might as well not be defined # elsewhere - or the logger might not be configured correctly diff --git a/spec/lib/zipline/zip_generator_spec.rb b/spec/lib/zipline/zip_generator_spec.rb index 442d243..cc6c355 100644 --- a/spec/lib/zipline/zip_generator_spec.rb +++ b/spec/lib/zipline/zip_generator_spec.rb @@ -38,7 +38,7 @@ def to_s let(:file){ directory.files.create(file_attributes) } describe '.normalize' do - let(:generator){ Zipline::ZipGenerator.new([])} + let(:generator){ Zipline::ZipGenerator.new(_files = [], _env = {})} context "CarrierWave" do context "Remote" do let(:file){ CarrierWave::Storage::Fog::File.new(nil,nil,nil) } diff --git a/spec/lib/zipline/zipline_spec.rb b/spec/lib/zipline/zipline_spec.rb index b828a39..cc88cc3 100644 --- a/spec/lib/zipline/zipline_spec.rb +++ b/spec/lib/zipline/zipline_spec.rb @@ -29,7 +29,7 @@ def download_zip_with_error_during_streaming end end - it 'passes keyword parameters to ZipTricks::Streamer' do + it 'passes keyword parameters to ZipKit::Streamer' do fake_rack_env = { "HTTP_VERSION" => "HTTP/1.0", "REQUEST_METHOD" => "GET", @@ -39,7 +39,7 @@ def download_zip_with_error_during_streaming "SERVER_NAME" => "host.example", "rack.input" => StringIO.new, } - expect(ZipTricks::Streamer).to receive(:new).with(anything, auto_rename_duplicate_filenames: false).and_call_original + expect(ZipKit::Streamer).to receive(:new).with(anything, auto_rename_duplicate_filenames: false).and_call_original status, headers, body = FakeController.action(:download_zip).call(fake_rack_env) @@ -56,13 +56,13 @@ def download_zip_with_error_during_streaming "SERVER_NAME" => "host.example", "rack.input" => StringIO.new, } - expect(ZipTricks::Streamer).to receive(:new).with(anything, auto_rename_duplicate_filenames: false).and_call_original fake_logger = double() expect(Logger).to receive(:new).and_return(fake_logger) expect(fake_logger).to receive(:error).with(instance_of(String)) expect { - FakeController.action(:download_zip_with_error_during_streaming).call(fake_rack_env) + status, headers, body = FakeController.action(:download_zip_with_error_during_streaming).call(fake_rack_env) + body.each { } }.to raise_error(/Something wonky/) end end diff --git a/zipline.gemspec b/zipline.gemspec index 3946713..85577bb 100644 --- a/zipline.gemspec +++ b/zipline.gemspec @@ -20,7 +20,7 @@ Gem::Specification.new do |gem| gem.add_dependency 'actionpack', ['>= 6.0', '< 8.0'] gem.add_dependency 'content_disposition', '~> 1.0' - gem.add_dependency 'zip_tricks', ['>= 4.8.3', '< 6'] # Minimum to 4.8.3 which is the last-released MIT version + gem.add_dependency 'zip_kit', ['~> 6', '>= 6.0.1', '< 7'] gem.add_development_dependency 'rspec', '~> 3' gem.add_development_dependency 'fog-aws' From a8d903aae8f07fb5dff1454c7376de95dc3069c4 Mon Sep 17 00:00:00 2001 From: Julik Tarkhanov Date: Fri, 1 Mar 2024 15:58:20 +0100 Subject: [PATCH 2/2] Swapped args in mock --- spec/lib/zipline/zip_generator_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/zipline/zip_generator_spec.rb b/spec/lib/zipline/zip_generator_spec.rb index cc6c355..c56db71 100644 --- a/spec/lib/zipline/zip_generator_spec.rb +++ b/spec/lib/zipline/zip_generator_spec.rb @@ -38,7 +38,7 @@ def to_s let(:file){ directory.files.create(file_attributes) } describe '.normalize' do - let(:generator){ Zipline::ZipGenerator.new(_files = [], _env = {})} + let(:generator){ Zipline::ZipGenerator.new(_env = {}, _files = [])} context "CarrierWave" do context "Remote" do let(:file){ CarrierWave::Storage::Fog::File.new(nil,nil,nil) }