Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance: specify string sizes in advance #11592

Merged

Conversation

BlobCodes
Copy link
Contributor

@BlobCodes BlobCodes commented Dec 15, 2021

Improves performance of multiple methods by specifying String.build capacity, or using interpolation.

Benchmark Code
require "benchmark"

puts "Benchmark 2: URI#request_target (9095f9bc670b693bc3eabb665ddfd31eefbdfea1)"

require "uri"

class URI
  def new_request_target : String
    string_size = 1
    string_size += @query ? @query.not_nil!.bytesize : -1
    string_size += @path.empty? ? 1 : @path.bytesize

    String.build(string_size) do |str|
      if @path.empty?
        str << "/" unless opaque?
      else
        str << @path
      end

      if (query = @query) && !query.empty?
        str << '?' << query
      end
    end
  end
end

uri = URI.parse "http://example.com/posts?id=30&limit=5#time=1305298413"

Benchmark.ips do |x|
  x.report("old") { uri.request_target }
  x.report("new") { uri.new_request_target }
end

puts "Benchmark 3: URI#resolve_path (e91ac9742385260fbdbc02b61ba24720792be775)"

require "uri"

class URI
  def new_resolve(uri : URI | String) : URI
    if uri.is_a?(URI)
      target = uri.dup
    else
      target = URI.parse(uri)
    end

    if target.absolute? || opaque?
      return target
    end

    target.scheme = scheme

    unless target.host || target.user
      target.host = host
      target.port = port
      target.user = user
      target.password = password
      if target.path.empty?
        target.path = remove_dot_segments(path)
        target.query ||= query
      else
        base = path
        if base.empty? && target.absolute?
          base = "/"
        end
        target.path = new_resolve_path(target.path, base: base)
      end
    end

    target
  end

  private def new_resolve_path(path : String, base : String) : String
    unless path.starts_with?('/')
      if path.empty?
        path = base
      elsif !base.empty?
        out_base = base.ends_with?('/') ? base : base[0..base.rindex('/')]
        path = String.interpolation(out_base, path)
      end
    end
    remove_dot_segments(path)
  end
end

uri_base = URI.parse "http://example.com/posts?id=30&limit=5#time=1305298413"
uri_target = URI.parse "../posts/comments/12?id=30&limit=5#time=1305298413"

Benchmark.ips do |x|
  x.report("old") { uri_base.resolve(uri_target) }
  x.report("new") { uri_base.new_resolve(uri_target) }
end

puts "Benchmark 4: URI#relativize_path (341d74943ab0b0f363284a395bb385f87e92d0a5)"

require "uri"

class URI
  def new_relativize(uri : URI | String) : URI
    if uri.is_a?(URI)
      uri = uri.dup
    else
      uri = URI.parse(uri)
    end

    if uri.opaque? || opaque? || uri.scheme.try &.downcase != @scheme.try &.downcase ||
       uri.host.try &.downcase != @host.try &.downcase || uri.port != @port || uri.user != @user ||
       uri.password != @password
      return uri
    end

    query = uri.query
    query = nil if query == @query
    fragment = uri.fragment

    path = new_relativize_path(@path, uri.path)

    URI.new(path: path, query: query, fragment: uri.fragment)
  end

  private def new_relativize_path(base : String, dst : String) : String
    return "" if base == dst

    if base =~ %r{(?:\A|/)\.\.?(?:/|\z)} && dst.starts_with?('/')
      # dst has abnormal absolute path,
      # like "/./", "/../", "/x/../", ...
      return dst
    end

    base_path = base.split('/', remove_empty: true)
    dst_path = dst.split('/', remove_empty: true)

    dst_path << "" if dst.ends_with?('/')
    base_path.pop? unless base.ends_with?('/')

    # discard same parts
    while !dst_path.empty? && !base_path.empty?
      dst = dst_path.first
      base = base_path.first
      if dst == base
        base_path.shift
        dst_path.shift
      elsif dst == "."
        dst_path.shift
      elsif base == "."
        base_path.shift
      else
        break
      end
    end

    # calculate
    if base_path.empty?
      if dst_path.empty?
        "./"
      elsif dst_path.first.includes?(':') # (see RFC2396 Section 5)
        string_size = 1 + dst_path.sum { |path| path.bytesize } + dst_path.size
        String.build(string_size) do |io|
          io << "./"
          dst_path.join(io, '/')
        end
      else
        if dst_path.empty? || dst_path.first == ""
          "./"
        else
          dst_path.join('/')
        end
      end
    else
      string_size = 3 * base_path.size + dst_path.sum { |path| path.bytesize } + dst_path.size - 1
      String.build(string_size) do |io|
        base_path.size.times { io << "../" }
        dst_path.join(io, '/')
      end
    end
  end
end

uri_base = URI.parse "http://example.com/posts?id=30&limit=5#time=1305298413"
uri_target = URI.parse "http://example.com/posts/my_good_post/my_title/comments?hi=true"

Benchmark.ips do |x|
  x.report("old") { uri_base.relativize(uri_target) }
  x.report("new") { uri_base.new_relativize(uri_target) }
end

puts "Benchmark 6: HTTP::Cookie#to_cookie_header (0877acf7c72c7c17a3818ae37a9ebfeb6f7037dc)"

require "http/cookie"

class HTTP::Cookie
  def new_to_cookie_header : String
    String.build(@name.bytesize + @value.bytesize + 1) do |io|
      to_cookie_header(io)
    end
  end
end

cookie = HTTP::Cookie.new("my_key", "my_value")

Benchmark.ips do |x|
  x.report("old") { cookie.to_cookie_header }
  x.report("new") { cookie.new_to_cookie_header }
end

Improvement: ~20-30% (for most Strings, less if output string size close to 64)
Improvement (URI#resolve): Up to 11% (if !target.host && !target.user)
Removed an unused variable (tmp), specified String.build string sizes.
Improvement (URI#relativize): Up to ~11% (if method even reaches #relativize_path)
Improvement: Up to 50% (dependant on key- and value string sizes)
src/uri.cr Outdated Show resolved Hide resolved
src/uri.cr Show resolved Hide resolved
src/uri.cr Outdated Show resolved Hide resolved
Co-authored-by: Johannes Müller <straightshoota@gmail.com>
src/uri.cr Outdated Show resolved Hide resolved
src/uri.cr Outdated Show resolved Hide resolved
Co-authored-by: Sijawusz Pur Rahnama <sija@sija.pl>
@oprypin oprypin changed the title Performance/specify string sizes Performance: specify string sizes in advance Dec 15, 2021
Copy link
Member

@sdogruyol sdogruyol left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @BlobCodes 🙏

@straight-shoota straight-shoota added this to the 1.3.0 milestone Dec 16, 2021
@straight-shoota straight-shoota merged commit f13b235 into crystal-lang:master Dec 17, 2021
@BlobCodes BlobCodes deleted the performance/specify-string-sizes branch January 29, 2022 22:40
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants