Skip to content

Commit

Permalink
Fix more violations
Browse files Browse the repository at this point in the history
  • Loading branch information
wallin committed Jul 13, 2017
1 parent 71a2e26 commit 07f85d7
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 36 deletions.
15 changes: 14 additions & 1 deletion .rubocop.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,15 @@
AllCops:
TargetRubyVersion: 2.3
TargetRubyVersion: 2.3

LineLength:
Description: 'Limit lines to 100 characters.'
Max: 100
Enabled: true

Metrics/ModuleLength:
Exclude:
- "**/*_test.rb"

Metrics/BlockLength:
Exclude:
- "**/*_test.rb"
1 change: 1 addition & 0 deletions lib/tdigest/centroid.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

module TDigest
# Representation of a Centroid
class Centroid
attr_accessor :mean, :n, :cumn, :mean_cumn
def initialize(mean, n, cumn, mean_cumn = nil)
Expand Down
50 changes: 24 additions & 26 deletions lib/tdigest/tdigest.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
require 'tdigest/centroid'

module TDigest
# Main TDigest class
class TDigest
VERBOSE_ENCODING = 1
SMALL_ENCODING = 2
Expand All @@ -22,10 +23,9 @@ def initialize(delta = 0.01, k = 25, cx = 1.1)
def +(other)
# Uses delta, k and cx from the caller
t = self.class.new(@delta, @k, @cx)
data = self.centroids.values + other.centroids.values
while data.length > 0
t.push_centroid(data.delete_at(rand(data.length)))
end
data = centroids.values + other.centroids.values
t.push_centroid(data.delete_at(rand(data.length))) until data.empty?

t
end

Expand Down Expand Up @@ -53,12 +53,12 @@ def as_small_bytes
c_arr = @centroids.each_with_object([]) do |(_, c), arr|
k = 0
n = c.n
while n < 0 || n > 0x7f
while n.negative? || n > 0x7f
b = 0x80 | (0x7f & n)
arr << b
n = n >> 7
k += 1
fail 'Unreasonable large number' if k > 6
raise 'Unreasonable large number' if k > 6
end
arr << n
end
Expand All @@ -79,7 +79,7 @@ def bound_mean(x)
def bound_mean_cumn(cumn)
last_c = nil
bounds = []
matches = @centroids.each do |k, v|
@centroids.each do |_, v|
if v.mean_cumn == cumn
bounds << v
break
Expand Down Expand Up @@ -110,7 +110,7 @@ def compression
end

def find_nearest(x)
return nil if size == 0
return nil if size.zero?

ceil = @centroids.upper_bound(x)
floor = @centroids.lower_bound(x)
Expand Down Expand Up @@ -141,7 +141,7 @@ def p_rank(x)
max = @centroids.last

x.map! do |item|
if size == 0
if size.zero?
nil
elsif item < min[1].mean
0.0
Expand All @@ -165,10 +165,10 @@ def percentile(p)
is_array = p.is_a? Array
p = [p] unless is_array
p.map! do |item|
unless (0..1).include? item
fail ArgumentError, "p should be in [0,1], got #{item}"
unless (0..1).cover? item
raise ArgumentError, "p should be in [0,1], got #{item}"
end
if size == 0
if size.zero?
nil
else
_cumulate(true)
Expand Down Expand Up @@ -221,7 +221,7 @@ def self.from_bytes(bytes)
case format
when VERBOSE_ENCODING
array = bytes[start_idx..-1].unpack("d#{size}L#{size}")
means, counts = array.each_slice(size).to_a if array.size > 0
means, counts = array.each_slice(size).to_a unless array.empty?
when SMALL_ENCODING
means = bytes[start_idx..(start_idx + 4 * size)].unpack("f#{size}")
# Decode delta encoding of means
Expand All @@ -239,17 +239,17 @@ def self.from_bytes(bytes)
z = 0x7f & v
shift = 7
while (v & 0x80) != 0
fail 'Shift too large in decode' if shift > 28
raise 'Shift too large in decode' if shift > 28
v = counts_bytes.shift || 0
z += (v & 0x7f) << shift
shift += 7
end
counts << z
end
# This shouldn't happen
fail 'Mismatch' unless counts.size == means.size
raise 'Mismatch' unless counts.size == means.size
else
fail 'Unknown compression format'
raise 'Unknown compression format'
end
if means && counts
means.zip(counts).each { |val| tdigest.push(val[0], val[1]) }
Expand Down Expand Up @@ -282,12 +282,12 @@ def _add_weight(nearest, x, n)

def _cumulate(exact = false, force = false)
unless force
factor = if @last_cumulate == 0
Float::INFINITY
else
(@n.to_f / @last_cumulate)
end
return if @n == @last_cumulate || (!exact && @cx && @cx > (factor))
factor = if @last_cumulate.zero?
Float::INFINITY
else
(@n.to_f / @last_cumulate)
end
return if @n == @last_cumulate || (!exact && @cx && @cx > factor)
end

cumn = 0
Expand Down Expand Up @@ -320,7 +320,7 @@ def _digest(x, n)
else
p = nearest.mean_cumn.to_f / @n
max_n = (4 * @n * @delta * p * (1 - p)).floor
if (max_n - nearest.n >= n)
if max_n - nearest.n >= n
_add_weight(nearest, x, n)
else
_new_centroid(x, n, nearest.cumn)
Expand All @@ -333,9 +333,7 @@ def _digest(x, n)
# it may be due to values being inserted in sorted order.
# We combat that by replaying the centroids in random order,
# which is what compress! does
if @centroids.size > (@k / @delta)
compress!
end
compress! if @centroids.size > (@k / @delta)

nil
end
Expand Down
21 changes: 12 additions & 9 deletions test/tdigest_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_that_it_has_a_version_number
10.times { tdigest.push(rand * 100) }
bytes = tdigest.as_bytes
new_tdigest = ::TDigest::TDigest.from_bytes(bytes)
new_tdigest.percentile(0.9).must_equal tdigest.percentile(0.9)
new_tdigest.percentile(0.9).must_equal(tdigest.percentile(0.9))
new_tdigest.as_bytes.must_equal bytes
end

Expand All @@ -42,7 +42,8 @@ def test_that_it_has_a_version_number
new_tdigest = ::TDigest::TDigest.from_bytes(bytes)
# Expect some rounding error due to compression
new_tdigest.percentile(0.9).round(5).must_equal(
tdigest.percentile(0.9).round(5))
tdigest.percentile(0.9).round(5)
)
new_tdigest.as_small_bytes.must_equal bytes
end

Expand Down Expand Up @@ -93,9 +94,9 @@ def test_that_it_has_a_version_number
tdigest.push(values)
tdigest.compress!

0.step(1,0.1).each do |i|
0.step(1, 0.1).each do |i|
q = tdigest.percentile(i)
maxerr = [maxerr, (i-q).abs].max
maxerr = [maxerr, (i - q).abs].max
end

assert_operator maxerr, :<, 0.01
Expand All @@ -106,7 +107,8 @@ def test_that_it_has_a_version_number
describe '#push' do
it "calls _cumulate so won't crash because of uninitialized mean_cumn" do
td = TDigest::TDigest.new
td.push [125000000.0,
td.push [
125000000.0,
104166666.66666666,
135416666.66666666,
104166666.66666666,
Expand Down Expand Up @@ -137,7 +139,8 @@ def test_that_it_has_a_version_number
113270270.27027026,
154459459.45945945,
123829787.23404256,
103191489.36170213]
103191489.36170213
]
end

it 'does not blow up if data comes in sorted' do
Expand Down Expand Up @@ -186,7 +189,7 @@ def test_that_it_has_a_version_number

it 'has the size of the two digests combined' do
new_tdigest = tdigest + @other
new_tdigest.size.must_equal (tdigest.size + @other.size)
new_tdigest.size.must_equal(tdigest.size + @other.size)
end
end
end
Expand All @@ -195,7 +198,7 @@ def test_that_it_has_a_version_number
it 'works with empty tdigests' do
other = ::TDigest::TDigest.new(0.001, 50, 1.2)
tdigest.merge!(other)
(tdigest).centroids.size.must_equal 0
tdigest.centroids.size.must_equal(0)
end

describe 'with populated tdigests' do
Expand All @@ -208,7 +211,7 @@ def test_that_it_has_a_version_number
end

it 'has the parameters of the calling tdigest' do
vars = [:@delta, :@k, :@cs]
vars = %i[@delta @k @cs]
expected = Hash[vars.map { |v| [v, tdigest.instance_variable_get(v)] }]
tdigest.merge!(@other)
vars.each do |v|
Expand Down

0 comments on commit 07f85d7

Please sign in to comment.