Skip to content

Commit

Permalink
Merge d86e16f into 94a0dbb
Browse files Browse the repository at this point in the history
  • Loading branch information
wallin committed Jul 21, 2017
2 parents 94a0dbb + d86e16f commit 94d7390
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 44 deletions.
15 changes: 15 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
AllCops:
TargetRubyVersion: 2.0

LineLength:
Description: 'Limit lines to 100 characters.'
Max: 100
Enabled: true

Metrics/ModuleLength:
Exclude:
- "**/*_test.rb"

Metrics/BlockLength:
Exclude:
- "**/*_test.rb"
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

source 'https://rubygems.org'

# Specify your gem's dependencies in tdigest.gemspec
Expand Down
10 changes: 6 additions & 4 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
require "bundler/gem_tasks"
require "rake/testtask"
# frozen_string_literal: true

require 'bundler/gem_tasks'
require 'rake/testtask'

Rake::TestTask.new(:test) do |t|
t.libs << "test"
t.libs << "lib"
t.libs << 'test'
t.libs << 'lib'
t.test_files = FileList['test/**/*_test.rb']
end

Expand Down
7 changes: 4 additions & 3 deletions bin/console
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require "bundler/setup"
require "tdigest"
require 'bundler/setup'
require 'tdigest'

# You can add fixtures and/or initialization code here to make experimenting
# with your gem easier. You can also use a different console, if you like.
Expand All @@ -10,5 +11,5 @@ require "tdigest"
# require "pry"
# Pry.start

require "irb"
require 'irb'
IRB.start
6 changes: 4 additions & 2 deletions lib/tdigest.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
require "tdigest/version"
require "tdigest/tdigest"
# frozen_string_literal: true

require 'tdigest/version'
require 'tdigest/tdigest'

module TDigest
# Your code goes here...
Expand Down
3 changes: 3 additions & 0 deletions lib/tdigest/centroid.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# frozen_string_literal: true

module TDigest
# Representation of a Centroid
class Centroid
attr_accessor :mean, :n, :cumn, :mean_cumn
def initialize(mean, n, cumn, mean_cumn = nil)
Expand Down
50 changes: 25 additions & 25 deletions lib/tdigest/tdigest.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# frozen_string_literal: true

require 'rbtree'
require 'tdigest/centroid'

module TDigest
# Main TDigest class
class TDigest
VERBOSE_ENCODING = 1
SMALL_ENCODING = 2
Expand All @@ -20,10 +23,9 @@ def initialize(delta = 0.01, k = 25, cx = 1.1)
def +(other)
# Uses delta, k and cx from the caller
t = self.class.new(@delta, @k, @cx)
data = self.centroids.values + other.centroids.values
while data.length > 0
t.push_centroid(data.delete_at(rand(data.length)))
end
data = centroids.values + other.centroids.values
t.push_centroid(data.delete_at(rand(data.length))) until data.empty?

t
end

Expand Down Expand Up @@ -56,7 +58,7 @@ def as_small_bytes
arr << b
n = n >> 7
k += 1
fail 'Unreasonable large number' if k > 6
raise 'Unreasonable large number' if k > 6
end
arr << n
end
Expand All @@ -77,7 +79,7 @@ def bound_mean(x)
def bound_mean_cumn(cumn)
last_c = nil
bounds = []
matches = @centroids.each do |k, v|
@centroids.each do |_, v|
if v.mean_cumn == cumn
bounds << v
break
Expand Down Expand Up @@ -108,7 +110,7 @@ def compression
end

def find_nearest(x)
return nil if size == 0
return nil if size.zero?

ceil = @centroids.upper_bound(x)
floor = @centroids.lower_bound(x)
Expand Down Expand Up @@ -139,7 +141,7 @@ def p_rank(x)
max = @centroids.last

x.map! do |item|
if size == 0
if size.zero?
nil
elsif item < min[1].mean
0.0
Expand All @@ -163,10 +165,10 @@ def percentile(p)
is_array = p.is_a? Array
p = [p] unless is_array
p.map! do |item|
unless (0..1).include? item
fail ArgumentError, "p should be in [0,1], got #{item}"
unless (0..1).cover? item
raise ArgumentError, "p should be in [0,1], got #{item}"
end
if size == 0
if size.zero?
nil
else
_cumulate(true)
Expand Down Expand Up @@ -219,7 +221,7 @@ def self.from_bytes(bytes)
case format
when VERBOSE_ENCODING
array = bytes[start_idx..-1].unpack("d#{size}L#{size}")
means, counts = array.each_slice(size).to_a if array.size > 0
means, counts = array.each_slice(size).to_a unless array.empty?
when SMALL_ENCODING
means = bytes[start_idx..(start_idx + 4 * size)].unpack("f#{size}")
# Decode delta encoding of means
Expand All @@ -237,17 +239,17 @@ def self.from_bytes(bytes)
z = 0x7f & v
shift = 7
while (v & 0x80) != 0
fail 'Shift too large in decode' if shift > 28
raise 'Shift too large in decode' if shift > 28
v = counts_bytes.shift || 0
z += (v & 0x7f) << shift
shift += 7
end
counts << z
end
# This shouldn't happen
fail 'Mismatch' unless counts.size == means.size
raise 'Mismatch' unless counts.size == means.size
else
fail 'Unknown compression format'
raise 'Unknown compression format'
end
if means && counts
means.zip(counts).each { |val| tdigest.push(val[0], val[1]) }
Expand Down Expand Up @@ -280,12 +282,12 @@ def _add_weight(nearest, x, n)

def _cumulate(exact = false, force = false)
unless force
factor = if @last_cumulate == 0
Float::INFINITY
else
(@n.to_f / @last_cumulate)
end
return if @n == @last_cumulate || (!exact && @cx && @cx > (factor))
factor = if @last_cumulate.zero?
Float::INFINITY
else
(@n.to_f / @last_cumulate)
end
return if @n == @last_cumulate || (!exact && @cx && @cx > factor)
end

cumn = 0
Expand Down Expand Up @@ -318,7 +320,7 @@ def _digest(x, n)
else
p = nearest.mean_cumn.to_f / @n
max_n = (4 * @n * @delta * p * (1 - p)).floor
if (max_n - nearest.n >= n)
if max_n - nearest.n >= n
_add_weight(nearest, x, n)
else
_new_centroid(x, n, nearest.cumn)
Expand All @@ -331,9 +333,7 @@ def _digest(x, n)
# it may be due to values being inserted in sorted order.
# We combat that by replaying the centroids in random order,
# which is what compress! does
if @centroids.size > (@k / @delta)
compress!
end
compress! if @centroids.size > (@k / @delta)

nil
end
Expand Down
4 changes: 3 additions & 1 deletion lib/tdigest/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

module TDigest
VERSION = "0.1.1"
VERSION = '0.1.1'
end
23 changes: 14 additions & 9 deletions test/tdigest_test.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require 'test_helper'

class TDigestTest < Minitest::Test
Expand All @@ -15,7 +17,7 @@ def test_that_it_has_a_version_number
10.times { tdigest.push(rand * 100) }
bytes = tdigest.as_bytes
new_tdigest = ::TDigest::TDigest.from_bytes(bytes)
new_tdigest.percentile(0.9).must_equal tdigest.percentile(0.9)
new_tdigest.percentile(0.9).must_equal(tdigest.percentile(0.9))
new_tdigest.as_bytes.must_equal bytes
end

Expand All @@ -40,7 +42,8 @@ def test_that_it_has_a_version_number
new_tdigest = ::TDigest::TDigest.from_bytes(bytes)
# Expect some rounding error due to compression
new_tdigest.percentile(0.9).round(5).must_equal(
tdigest.percentile(0.9).round(5))
tdigest.percentile(0.9).round(5)
)
new_tdigest.as_small_bytes.must_equal bytes
end

Expand Down Expand Up @@ -91,9 +94,9 @@ def test_that_it_has_a_version_number
tdigest.push(values)
tdigest.compress!

0.step(1,0.1).each do |i|
0.step(1, 0.1).each do |i|
q = tdigest.percentile(i)
maxerr = [maxerr, (i-q).abs].max
maxerr = [maxerr, (i - q).abs].max
end

assert_operator maxerr, :<, 0.01
Expand All @@ -104,7 +107,8 @@ def test_that_it_has_a_version_number
describe '#push' do
it "calls _cumulate so won't crash because of uninitialized mean_cumn" do
td = TDigest::TDigest.new
td.push [125000000.0,
td.push [
125000000.0,
104166666.66666666,
135416666.66666666,
104166666.66666666,
Expand Down Expand Up @@ -135,7 +139,8 @@ def test_that_it_has_a_version_number
113270270.27027026,
154459459.45945945,
123829787.23404256,
103191489.36170213]
103191489.36170213
]
end

it 'does not blow up if data comes in sorted' do
Expand Down Expand Up @@ -184,7 +189,7 @@ def test_that_it_has_a_version_number

it 'has the size of the two digests combined' do
new_tdigest = tdigest + @other
new_tdigest.size.must_equal (tdigest.size + @other.size)
new_tdigest.size.must_equal(tdigest.size + @other.size)
end
end
end
Expand All @@ -193,7 +198,7 @@ def test_that_it_has_a_version_number
it 'works with empty tdigests' do
other = ::TDigest::TDigest.new(0.001, 50, 1.2)
tdigest.merge!(other)
(tdigest).centroids.size.must_equal 0
tdigest.centroids.size.must_equal(0)
end

describe 'with populated tdigests' do
Expand All @@ -206,7 +211,7 @@ def test_that_it_has_a_version_number
end

it 'has the parameters of the calling tdigest' do
vars = [:@delta, :@k, :@cs]
vars = %i[@delta @k @cs]
expected = Hash[vars.map { |v| [v, tdigest.instance_variable_get(v)] }]
tdigest.merge!(@other)
vars.each do |v|
Expand Down
2 changes: 2 additions & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)

require 'simplecov'
Expand Down

0 comments on commit 94d7390

Please sign in to comment.