Skip to content

Commit

Permalink
Merge a3b6598 into 6ae0f3d
Browse files Browse the repository at this point in the history
  • Loading branch information
clok committed Jun 15, 2022
2 parents 6ae0f3d + a3b6598 commit 2ca8b46
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 150 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ bin/
dist/
.idea/
tmp/
benchmark
219 changes: 130 additions & 89 deletions commands/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ package commands
import (
"fmt"
"github.com/clok/kemba"
"github.com/montanaflynn/stats"
"golang.org/x/text/language"
"golang.org/x/text/message"
"io"
"math"
"os"
"os/exec"
"runtime"
"sort"
"strconv"
"strings"
)
Expand Down Expand Up @@ -181,118 +182,158 @@ func processLine(opts *processLineInput) (float64, error) {
return f, nil
}

func processSample(data []float64) (ResultSet, error) {
var res ResultSet
var err error
func processSample(data []float64) (res ResultSet, err error) {
// Sort list
sort.Float64s(data)

// set data based on sorted slice
res.n = len(data)
res.min = data[0]
res.max = data[res.n-1]

// sum
res.sum = calcSum(data)

// unstable mean
res.mean = calcMean(data)

// median
res.median = calcMedian(data)

// mode
res.mode = calcMode(data)

// variance
res.variance = calcVariance(data, res.mean)

// standard deviation
res.stdev = math.Sqrt(res.variance)

// Percentiles
res.p50 = calcPercentile(data, 50)
res.p75 = calcPercentile(data, 75)
res.p90 = calcPercentile(data, 90)
res.p95 = calcPercentile(data, 95)
res.p99 = calcPercentile(data, 99)

// quartiles
var c1 int
var c2 int
if res.n%2 == 0 {
c1 = res.n / 2
c2 = res.n / 2
} else {
c1 = (res.n - 1) / 2
c2 = c1 + 1
}
res.q1 = calcMedian(data[:c1])
res.q2 = res.median
res.q3 = calcMedian(data[c2:])

var min float64
min, err = stats.Min(data)
if err != nil {
return res, err
return res, nil
}

func calcSum(data []float64) (sum float64) {
for _, v := range data {
sum += v
}
res.min = min
return sum
}

var max float64
max, err = stats.Max(data)
if err != nil {
return res, err
func calcMode(data []float64) (mode []float64) {
// Return the data if there's only one number
l := len(data)
if l == 1 {
return data
} else if l == 0 {
return nil
}
res.max = max

var mean float64
mean, err = stats.Mean(data)
if err != nil {
return res, err
// Traverse sorted array,
// tracking the longest repeating sequence
mode = make([]float64, 5)
cnt, maxCnt := 1, 1
for i := 1; i < l; i++ {
switch {
case data[i] == data[i-1]:
cnt++
case cnt == maxCnt && maxCnt != 1:
mode = append(mode, data[i-1])
cnt = 1
case cnt > maxCnt:
mode = append(mode[:0], data[i-1])
maxCnt, cnt = cnt, 1
default:
cnt = 1
}
}
switch {
case cnt == maxCnt:
mode = append(mode, data[l-1])
case cnt > maxCnt:
mode = append(mode[:0], data[l-1])
maxCnt = cnt
}
res.mean = mean

var mode []float64
mode, err = stats.Mode(data)
if err != nil {
return res, err
// Since length must be greater than 1,
// check for slices of distinct values
if maxCnt == 1 || len(mode)*maxCnt == l && maxCnt != l {
return []float64{}
}
res.mode = mode

var median float64
median, err = stats.Median(data)
if err != nil {
return res, err
return mode
}

func calcVariance(data []float64, mean float64) (variance float64) {
// Sum the square of the mean subtracted from each number
for _, n := range data {
variance += (n - mean) * (n - mean)
}
res.median = median

// mean of the squared differences
return variance / float64(len(data))
}

func calcMean(data []float64) float64 {
var sum float64
sum, err = stats.Sum(data)
if err != nil {
return res, err
for _, v := range data {
sum += v
}
res.sum = sum
return sum / float64(len(data))
}

var variance float64
variance, err = stats.Variance(data)
if err != nil {
return res, err
func calcMedian(data []float64) (median float64) {
n := len(data)
if n%2 == 0 {
median = calcMean(data[n/2-1 : n/2+1])
} else {
median = data[n/2]
}
res.variance = variance
return median
}

var stdev float64
stdev, err = stats.StandardDeviation(data)
if err != nil {
return res, err
}
res.stdev = stdev
func calcPercentile(data []float64, percentile float64) (result float64) {
// generate predicted index
index := (percentile / 100) * float64(len(data))

var p50 float64
p50, err = stats.Percentile(data, 50)
if err != nil {
return res, err
}
res.p50 = p50
// Check if the index is a whole number
if index == float64(int64(index)) {

var p75 float64
p75, err = stats.Percentile(data, 75)
if err != nil {
return res, err
}
res.p75 = p75
// Convert float to int
idx := int(index)

var p90 float64
p90, err = stats.Percentile(data, 90)
if err != nil {
return res, err
}
res.p90 = p90
// Find the value at the index
result = data[idx-1]

var p95 float64
p95, err = stats.Percentile(data, 95)
if err != nil {
return res, err
}
res.p95 = p95
} else {

var p99 float64
p99, err = stats.Percentile(data, 99)
if err != nil {
return res, err
}
res.p99 = p99
// Convert float to int via truncation
idx := int(index)

var qs stats.Quartiles
qs, err = stats.Quartile(data)
if err != nil {
return res, err
}
res.q1 = qs.Q1
res.q2 = qs.Q2
res.q3 = qs.Q3
// Find the average of the index and following values
result = calcMean([]float64{data[idx-1], data[idx]})

var outliers stats.Outliers
outliers, err = stats.QuartileOutliers(data)
if err != nil {
return res, err
}
res.mildOutliers = outliers.Mild
res.extremeOutliers = outliers.Extreme

return res, nil
return result
}
32 changes: 32 additions & 0 deletions commands/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,28 @@ package commands

import (
"bufio"
"fmt"
"math"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func withinTolerance(a, b, e float64) bool {
if a == b {
return true
}

d := math.Abs(a - b)

if b == 0 {
return d < e
}

return (d / math.Abs(b)) < e
}

func Test_processLine(t *testing.T) {
is := assert.New(t)

Expand Down Expand Up @@ -80,9 +96,25 @@ func Test_processSample(t *testing.T) {
t.Run("processes lines and generate sample", func(t *testing.T) {
data := []float64{1, 2, 3, 4}
result, err := processSample(data)
fmt.Printf("%# v\n", result)
is.NoError(err)
is.IsType(ResultSet{}, result)
is.Equal(4, result.n)
is.Equal(float64(1), result.min)
is.Equal(float64(4), result.max)
is.Equal(2.5, result.mean)
is.Equal([]float64{}, result.mode)
is.Equal(2.5, result.median)
is.Equal(float64(10), result.sum)
is.Equal(1.118033988749895, result.stdev)
is.Equal(1.25, result.variance)
is.Equal(float64(2), result.p50)
is.Equal(float64(3), result.p75)
is.Equal(3.5, result.p90)
is.Equal(3.5, result.p95)
is.Equal(3.5, result.p99)
is.Equal(1.5, result.q1)
is.Equal(2.5, result.q2)
is.Equal(3.5, result.q3)
})
}

0 comments on commit 2ca8b46

Please sign in to comment.