Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Mechanize to parse Content-Disposition. #5646

Merged
merged 1 commit into from
Jan 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
**/vendor/bundle
**/vendor/ruby
**/vendor/bundle-standalone/ruby/*/bin
**/vendor/bundle-standalone/ruby/*/build_info/
**/vendor/bundle-standalone/ruby/*/cache
**/vendor/bundle-standalone/ruby/*/extensions
**/vendor/bundle-standalone/ruby/*/gems/*/*
Expand Down Expand Up @@ -93,15 +94,33 @@

# Ignore rubocop's (and other) dependencies we don't wish to vendor
**/vendor/bundle-standalone/ruby/*/gems/ast-*/
**/vendor/bundle-standalone/ruby/*/gems/connection_pool-*/lib
**/vendor/bundle-standalone/ruby/*/gems/domain_name-*/lib
**/vendor/bundle-standalone/ruby/*/gems/http-cookie-*/lib
**/vendor/bundle-standalone/ruby/*/gems/jaro_winkler-*/
**/vendor/bundle-standalone/ruby/*/gems/mime-types-data-*/lib
**/vendor/bundle-standalone/ruby/*/gems/mime-types-*/lib
**/vendor/bundle-standalone/ruby/*/gems/mini_portile2-*/lib
**/vendor/bundle-standalone/ruby/*/gems/minitest-*/lib
**/vendor/bundle-standalone/ruby/*/gems/net-http-digest_auth-*/lib
**/vendor/bundle-standalone/ruby/*/gems/net-http-persistent-*/lib
**/vendor/bundle-standalone/ruby/*/gems/nokogiri-*/lib
**/vendor/bundle-standalone/ruby/*/gems/ntlm-http-*/lib
**/vendor/bundle-standalone/ruby/*/gems/parallel-*/
**/vendor/bundle-standalone/ruby/*/gems/parser-*/
**/vendor/bundle-standalone/ruby/*/gems/powerpack-*/
**/vendor/bundle-standalone/ruby/*/gems/rainbow-*/
**/vendor/bundle-standalone/ruby/*/gems/rubocop-0*/
**/vendor/bundle-standalone/ruby/*/gems/ruby-progressbar-*/
**/vendor/bundle-standalone/ruby/*/gems/unf_ext-*/lib
**/vendor/bundle-standalone/ruby/*/gems/unf-*/lib
**/vendor/bundle-standalone/ruby/*/gems/unicode-display_width-*/
**/vendor/bundle-standalone/ruby/*/gems/webrobots-*/lib

# Only include the `Mechanize::HTTP::ContentDispositionParser`.
**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib
!**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib/mechanize/http/content_disposition_parser.rb
!**/vendor/bundle-standalone/ruby/*/gems/mechanize-*/lib/mechanize/version.rb

# Ignore `bin` contents (again).
/bin
Expand Down
20 changes: 17 additions & 3 deletions Library/Homebrew/download_strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
require "lazy_object"
require "cgi"

require "mechanize/version"
require "mechanize/http/content_disposition_parser"

class AbstractDownloadStrategy
extend Forwardable
include FileUtils
Expand Down Expand Up @@ -363,9 +366,20 @@ def resolve_url_basename_time(url)
end
end

filenames =
lines.map { |line| line[/^Content\-Disposition:\s*(?:inline|attachment);\s*filename=(["']?)([^;]+)\1/i, 2] }
.compact
content_disposition_parser = Mechanize::HTTP::ContentDispositionParser.new

parse_content_disposition = lambda do |line|
next unless content_disposition = content_disposition_parser.parse(line, true)

if filename_with_encoding = content_disposition.parameters["filename*"]
encoding, encoded_filename = filename_with_encoding.split("''", 2)
URI.decode_www_form_component(encoded_filename).encode(encoding)
else
content_disposition.filename
end
end

filenames = lines.map(&parse_content_disposition).compact

time =
lines.map { |line| line[/^Last\-Modified:\s*(.+)/i, 1] }
Expand Down
1 change: 1 addition & 0 deletions Library/Homebrew/vendor/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ source "https://rubygems.org"
gem "activesupport"
gem "concurrent-ruby"
gem "backports"
gem "mechanize"
gem "plist"
gem "ruby-macho"
gem "rubocop-rspec"
Expand Down
29 changes: 29 additions & 0 deletions Library/Homebrew/vendor/Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,34 @@ GEM
ast (2.4.0)
backports (3.11.4)
concurrent-ruby (1.1.4)
connection_pool (2.2.2)
domain_name (0.5.20180417)
unf (>= 0.0.5, < 1.0.0)
http-cookie (1.0.3)
domain_name (~> 0.5)
i18n (1.5.3)
concurrent-ruby (~> 1.0)
jaro_winkler (1.5.2)
mechanize (2.7.6)
domain_name (~> 0.5, >= 0.5.1)
http-cookie (~> 1.0)
mime-types (>= 1.17.2)
net-http-digest_auth (~> 1.1, >= 1.1.1)
net-http-persistent (>= 2.5.2)
nokogiri (~> 1.6)
ntlm-http (~> 0.1, >= 0.1.1)
webrobots (>= 0.0.9, < 0.2)
mime-types (3.2.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2018.0812)
mini_portile2 (2.4.0)
minitest (5.11.3)
net-http-digest_auth (1.4.1)
net-http-persistent (3.0.0)
connection_pool (~> 2.2)
nokogiri (1.10.1)
mini_portile2 (~> 2.4.0)
ntlm-http (0.1.1)
parallel (1.13.0)
parser (2.6.0.0)
ast (~> 2.4.0)
Expand All @@ -34,7 +58,11 @@ GEM
thread_safe (0.3.6)
tzinfo (1.2.5)
thread_safe (~> 0.1)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.5)
unicode-display_width (1.4.1)
webrobots (0.1.2)

PLATFORMS
ruby
Expand All @@ -43,6 +71,7 @@ DEPENDENCIES
activesupport
backports
concurrent-ruby
mechanize
plist
rubocop
rubocop-rspec
Expand Down
16 changes: 16 additions & 0 deletions Library/Homebrew/vendor/bundle-standalone/bundler/setup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,24 @@
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ast-2.4.0/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/backports-3.11.4/lib"
$:.unshift "#{path}/"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/connection_pool-2.2.2/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/unf_ext-0.0.7.5"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/unf_ext-0.0.7.5/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/unf-0.1.4/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/domain_name-0.5.20180417/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/http-cookie-1.0.3/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/jaro_winkler-1.5.2"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/jaro_winkler-1.5.2/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mime-types-data-3.2018.0812/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mime-types-3.2.2/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/net-http-digest_auth-1.4.1/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/net-http-persistent-3.0.0/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mini_portile2-2.4.0/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/extensions/universal-darwin-18/2.3.0/nokogiri-1.10.1"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/nokogiri-1.10.1/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/ntlm-http-0.1.1/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/webrobots-0.1.2/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/mechanize-2.7.6/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parallel-1.13.0/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/parser-2.6.0.0/lib"
$:.unshift "#{path}/../#{ruby_engine}/#{ruby_version}/gems/plist-3.5.0/lib"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
# coding: BINARY

require 'strscan'
require 'time'

class Mechanize::HTTP
ContentDisposition = Struct.new :type, :filename, :creation_date,
:modification_date, :read_date, :size, :parameters
end

##
# Parser Content-Disposition headers that loosely follows RFC 2183.
#
# Beyond RFC 2183, this parser allows:
#
# * Missing disposition-type
# * Multiple semicolons
# * Whitespace around semicolons

class Mechanize::HTTP::ContentDispositionParser

attr_accessor :scanner # :nodoc:

@parser = nil

##
# Parses the disposition type and params in the +content_disposition+
# string. The "Content-Disposition:" must be removed.

def self.parse content_disposition
@parser ||= self.new
@parser.parse content_disposition
end

##
# Creates a new parser Content-Disposition headers

def initialize
@scanner = nil
end

##
# Parses the +content_disposition+ header. If +header+ is set to true the
# "Content-Disposition:" portion will be parsed

def parse content_disposition, header = false
return nil if content_disposition.empty?

@scanner = StringScanner.new content_disposition

if header then
return nil unless @scanner.scan(/Content-Disposition/i)
return nil unless @scanner.scan(/:/)
spaces
end

type = rfc_2045_token
@scanner.scan(/;+/)

if @scanner.peek(1) == '=' then
@scanner.pos = 0
type = nil
end

disposition = Mechanize::HTTP::ContentDisposition.new type

spaces

return nil unless parameters = parse_parameters

disposition.filename = parameters.delete 'filename'
disposition.creation_date = parameters.delete 'creation-date'
disposition.modification_date = parameters.delete 'modification-date'
disposition.read_date = parameters.delete 'read-date'
disposition.size = parameters.delete 'size'
disposition.parameters = parameters

disposition
end

##
# Extracts disposition-parm and returns a Hash.

def parse_parameters
parameters = {}

while true do
return nil unless param = rfc_2045_token
param.downcase!
return nil unless @scanner.scan(/=/)

value = case param
when /^filename$/ then
rfc_2045_value
when /^(creation|modification|read)-date$/ then
Time.rfc822 rfc_2045_quoted_string
when /^size$/ then
rfc_2045_value.to_i(10)
else
rfc_2045_value
end

return nil unless value

parameters[param] = value

spaces

break if @scanner.eos? or not @scanner.scan(/;+/)

spaces
end

parameters
end

##
# quoted-string = <"> *(qtext/quoted-pair) <">
# qtext = <any CHAR excepting <">, "\" & CR,
# and including linear-white-space
# quoted-pair = "\" CHAR
#
# Parses an RFC 2045 quoted-string

def rfc_2045_quoted_string
return nil unless @scanner.scan(/"/)

text = ''

while true do
chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "

if chunk then
text << chunk

if @scanner.peek(1) == '\\' then
@scanner.get_byte
return nil if @scanner.eos?
text << @scanner.get_byte
elsif @scanner.scan(/\r\n[\t ]+/) then
text << " "
end
else
if '\\"' == @scanner.peek(2) then
@scanner.skip(/\\/)
text << @scanner.get_byte
elsif '"' == @scanner.peek(1) then
@scanner.get_byte
break
else
return nil
end
end
end

text
end

##
# token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
#
# Parses an RFC 2045 token

def rfc_2045_token
@scanner.scan(/[^\000-\037\177()<>@,;:\\"\/\[\]?= ]+/)
end

##
# value := token / quoted-string
#
# Parses an RFC 2045 value

def rfc_2045_value
if @scanner.peek(1) == '"' then
rfc_2045_quoted_string
else
rfc_2045_token
end
end

##
# 1*SP
#
# Parses spaces

def spaces
@scanner.scan(/ +/)
end

end

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Mechanize
VERSION = "2.7.6"
end