diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..46e8cea --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,15 @@ +AllCops: + TargetRubyVersion: 2.0 + +LineLength: + Description: 'Limit lines to 100 characters.' + Max: 100 + Enabled: true + +Metrics/ModuleLength: + Exclude: + - "**/*_test.rb" + +Metrics/BlockLength: + Exclude: + - "**/*_test.rb" \ No newline at end of file diff --git a/Gemfile b/Gemfile index 38ff725..bdbdcea 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source 'https://rubygems.org' # Specify your gem's dependencies in tdigest.gemspec diff --git a/Rakefile b/Rakefile index d6c5113..2d96143 100644 --- a/Rakefile +++ b/Rakefile @@ -1,9 +1,11 @@ -require "bundler/gem_tasks" -require "rake/testtask" +# frozen_string_literal: true + +require 'bundler/gem_tasks' +require 'rake/testtask' Rake::TestTask.new(:test) do |t| - t.libs << "test" - t.libs << "lib" + t.libs << 'test' + t.libs << 'lib' t.test_files = FileList['test/**/*_test.rb'] end diff --git a/bin/console b/bin/console index 7e467e0..09eca5f 100755 --- a/bin/console +++ b/bin/console @@ -1,7 +1,8 @@ #!/usr/bin/env ruby +# frozen_string_literal: true -require "bundler/setup" -require "tdigest" +require 'bundler/setup' +require 'tdigest' # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. @@ -10,5 +11,5 @@ require "tdigest" # require "pry" # Pry.start -require "irb" +require 'irb' IRB.start diff --git a/lib/tdigest.rb b/lib/tdigest.rb index fcd9cfa..288fce4 100644 --- a/lib/tdigest.rb +++ b/lib/tdigest.rb @@ -1,5 +1,7 @@ -require "tdigest/version" -require "tdigest/tdigest" +# frozen_string_literal: true + +require 'tdigest/version' +require 'tdigest/tdigest' module TDigest # Your code goes here... diff --git a/lib/tdigest/centroid.rb b/lib/tdigest/centroid.rb index 3910253..f98aa47 100644 --- a/lib/tdigest/centroid.rb +++ b/lib/tdigest/centroid.rb @@ -1,4 +1,7 @@ +# frozen_string_literal: true + module TDigest + # Representation of a Centroid class Centroid attr_accessor :mean, :n, :cumn, :mean_cumn def initialize(mean, n, cumn, mean_cumn = nil) diff --git a/lib/tdigest/tdigest.rb b/lib/tdigest/tdigest.rb index 5ec1a0a..caeba9b 100644 --- a/lib/tdigest/tdigest.rb +++ b/lib/tdigest/tdigest.rb @@ -1,7 +1,10 @@ +# frozen_string_literal: true + require 'rbtree' require 'tdigest/centroid' module TDigest + # Main TDigest class class TDigest VERBOSE_ENCODING = 1 SMALL_ENCODING = 2 @@ -20,10 +23,9 @@ def initialize(delta = 0.01, k = 25, cx = 1.1) def +(other) # Uses delta, k and cx from the caller t = self.class.new(@delta, @k, @cx) - data = self.centroids.values + other.centroids.values - while data.length > 0 - t.push_centroid(data.delete_at(rand(data.length))) - end + data = centroids.values + other.centroids.values + t.push_centroid(data.delete_at(rand(data.length))) until data.empty? + t end @@ -56,7 +58,7 @@ def as_small_bytes arr << b n = n >> 7 k += 1 - fail 'Unreasonable large number' if k > 6 + raise 'Unreasonable large number' if k > 6 end arr << n end @@ -77,7 +79,7 @@ def bound_mean(x) def bound_mean_cumn(cumn) last_c = nil bounds = [] - matches = @centroids.each do |k, v| + @centroids.each do |_, v| if v.mean_cumn == cumn bounds << v break @@ -108,7 +110,7 @@ def compression end def find_nearest(x) - return nil if size == 0 + return nil if size.zero? ceil = @centroids.upper_bound(x) floor = @centroids.lower_bound(x) @@ -139,7 +141,7 @@ def p_rank(x) max = @centroids.last x.map! do |item| - if size == 0 + if size.zero? nil elsif item < min[1].mean 0.0 @@ -163,10 +165,10 @@ def percentile(p) is_array = p.is_a? Array p = [p] unless is_array p.map! do |item| - unless (0..1).include? item - fail ArgumentError, "p should be in [0,1], got #{item}" + unless (0..1).cover? item + raise ArgumentError, "p should be in [0,1], got #{item}" end - if size == 0 + if size.zero? nil else _cumulate(true) @@ -219,7 +221,7 @@ def self.from_bytes(bytes) case format when VERBOSE_ENCODING array = bytes[start_idx..-1].unpack("d#{size}L#{size}") - means, counts = array.each_slice(size).to_a if array.size > 0 + means, counts = array.each_slice(size).to_a unless array.empty? when SMALL_ENCODING means = bytes[start_idx..(start_idx + 4 * size)].unpack("f#{size}") # Decode delta encoding of means @@ -237,7 +239,7 @@ def self.from_bytes(bytes) z = 0x7f & v shift = 7 while (v & 0x80) != 0 - fail 'Shift too large in decode' if shift > 28 + raise 'Shift too large in decode' if shift > 28 v = counts_bytes.shift || 0 z += (v & 0x7f) << shift shift += 7 @@ -245,9 +247,9 @@ def self.from_bytes(bytes) counts << z end # This shouldn't happen - fail 'Mismatch' unless counts.size == means.size + raise 'Mismatch' unless counts.size == means.size else - fail 'Unknown compression format' + raise 'Unknown compression format' end if means && counts means.zip(counts).each { |val| tdigest.push(val[0], val[1]) } @@ -280,12 +282,12 @@ def _add_weight(nearest, x, n) def _cumulate(exact = false, force = false) unless force - factor = if @last_cumulate == 0 - Float::INFINITY - else - (@n.to_f / @last_cumulate) - end - return if @n == @last_cumulate || (!exact && @cx && @cx > (factor)) + factor = if @last_cumulate.zero? + Float::INFINITY + else + (@n.to_f / @last_cumulate) + end + return if @n == @last_cumulate || (!exact && @cx && @cx > factor) end cumn = 0 @@ -318,7 +320,7 @@ def _digest(x, n) else p = nearest.mean_cumn.to_f / @n max_n = (4 * @n * @delta * p * (1 - p)).floor - if (max_n - nearest.n >= n) + if max_n - nearest.n >= n _add_weight(nearest, x, n) else _new_centroid(x, n, nearest.cumn) @@ -331,9 +333,7 @@ def _digest(x, n) # it may be due to values being inserted in sorted order. # We combat that by replaying the centroids in random order, # which is what compress! does - if @centroids.size > (@k / @delta) - compress! - end + compress! if @centroids.size > (@k / @delta) nil end diff --git a/lib/tdigest/version.rb b/lib/tdigest/version.rb index 70974df..303a247 100644 --- a/lib/tdigest/version.rb +++ b/lib/tdigest/version.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module TDigest - VERSION = "0.1.1" + VERSION = '0.1.1' end diff --git a/test/tdigest_test.rb b/test/tdigest_test.rb index 76508d4..fb6b70e 100644 --- a/test/tdigest_test.rb +++ b/test/tdigest_test.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'test_helper' class TDigestTest < Minitest::Test @@ -15,7 +17,7 @@ def test_that_it_has_a_version_number 10.times { tdigest.push(rand * 100) } bytes = tdigest.as_bytes new_tdigest = ::TDigest::TDigest.from_bytes(bytes) - new_tdigest.percentile(0.9).must_equal tdigest.percentile(0.9) + new_tdigest.percentile(0.9).must_equal(tdigest.percentile(0.9)) new_tdigest.as_bytes.must_equal bytes end @@ -40,7 +42,8 @@ def test_that_it_has_a_version_number new_tdigest = ::TDigest::TDigest.from_bytes(bytes) # Expect some rounding error due to compression new_tdigest.percentile(0.9).round(5).must_equal( - tdigest.percentile(0.9).round(5)) + tdigest.percentile(0.9).round(5) + ) new_tdigest.as_small_bytes.must_equal bytes end @@ -91,9 +94,9 @@ def test_that_it_has_a_version_number tdigest.push(values) tdigest.compress! - 0.step(1,0.1).each do |i| + 0.step(1, 0.1).each do |i| q = tdigest.percentile(i) - maxerr = [maxerr, (i-q).abs].max + maxerr = [maxerr, (i - q).abs].max end assert_operator maxerr, :<, 0.01 @@ -104,7 +107,8 @@ def test_that_it_has_a_version_number describe '#push' do it "calls _cumulate so won't crash because of uninitialized mean_cumn" do td = TDigest::TDigest.new - td.push [125000000.0, + td.push [ + 125000000.0, 104166666.66666666, 135416666.66666666, 104166666.66666666, @@ -135,7 +139,8 @@ def test_that_it_has_a_version_number 113270270.27027026, 154459459.45945945, 123829787.23404256, - 103191489.36170213] + 103191489.36170213 + ] end it 'does not blow up if data comes in sorted' do @@ -184,7 +189,7 @@ def test_that_it_has_a_version_number it 'has the size of the two digests combined' do new_tdigest = tdigest + @other - new_tdigest.size.must_equal (tdigest.size + @other.size) + new_tdigest.size.must_equal(tdigest.size + @other.size) end end end @@ -193,7 +198,7 @@ def test_that_it_has_a_version_number it 'works with empty tdigests' do other = ::TDigest::TDigest.new(0.001, 50, 1.2) tdigest.merge!(other) - (tdigest).centroids.size.must_equal 0 + tdigest.centroids.size.must_equal(0) end describe 'with populated tdigests' do @@ -206,7 +211,7 @@ def test_that_it_has_a_version_number end it 'has the parameters of the calling tdigest' do - vars = [:@delta, :@k, :@cs] + vars = %i[@delta @k @cs] expected = Hash[vars.map { |v| [v, tdigest.instance_variable_get(v)] }] tdigest.merge!(@other) vars.each do |v| diff --git a/test/test_helper.rb b/test/test_helper.rb index c680b50..736cfbc 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) require 'simplecov'