feat(simple): add simple mode for generating stats distro info

Signed-off-by: Derek Smith <drsmith.phys@gmail.com>
clok · Feb 8, 2022 · 196d6da · 196d6da
1 parent c338310
commit 196d6da
Show file tree

Hide file tree

Showing 9 changed files with 560 additions and 164 deletions.
diff --git a/commands/common.go b/commands/common.go
@@ -1,12 +1,13 @@
 package commands
 
 import (
-	"bufio"
 	"fmt"
 	"io"
+	"strconv"
 	"strings"
 
 	"github.com/clok/kemba"
+	"github.com/montanaflynn/stats"
 )
 
 var (
@@ -19,54 +20,195 @@ var (
 	kfpld = kc.Extend("processLine:debug")
 )
 
-type processReaderInput struct {
-	reader *bufio.Reader
-}
-
-func processReader(opts *processReaderInput) error {
-	var output []rune
+func processReader(opts *processReaderInput) ([]float64, error) {
+	var data []rune
 	var lines int64
+	var fails int64
+	var sample []float64
 
 	for {
 		input, _, err := opts.reader.ReadRune()
 		if err != nil && err == io.EOF {
 			break
 		}
 		kfpd.Printf("%c", input)
-		output = append(output, input)
+		data = append(data, input)
 		if input == '\n' {
-			err := processLine(&processLineInput{
-				output: output,
+			value, err := processLine(&processLineInput{
+				line: data,
 			})
-			if err != nil {
-				return err
+			if err != nil && value == 0 {
+				fails++
+			} else {
+				sample = append(sample, value)
 			}
 			lines++
-			output = []rune{}
+
+			// TODO: Add support for refresh rate
+			// if lines%100 == 0 {
+			//	res, err := processSample(sample)
+			//	if err != nil {
+			//		return nil, err
+			//	}
+			//	var row string
+			//	for _, field := range res.ListFields() {
+			//		if row == "" {
+			//			row = fmt.Sprintf(res.GetFormat(field), res.Get(field))
+			//		} else {
+			//			format := "%s\t" + res.GetFormat(field)
+			//			row = fmt.Sprintf(format, row, res.Get(field))
+			//		}
+			//	}
+			//	fmt.Printf("%s\r", row)
+			// }
+
+			// reset
+			data = []rune{}
 			kfpd.Println("-- RESET OUTPUT --")
 		}
 	}
 
-	if len(output) > 0 {
-		err := processLine(&processLineInput{
-			output: output,
+	if len(data) > 0 {
+		value, err := processLine(&processLineInput{
+			line: data,
 		})
-		if err != nil {
-			return err
+		if err != nil && value == 0 {
+			fails++
 		}
 		lines++
+		sample = append(sample, value)
 	}
-	kfpl.Printf("%d lines processed", lines)
-	return nil
+	kfpl.Printf("%d / %d lines processed failed to parse", fails, lines)
+	return sample, nil
 }
 
-type processLineInput struct {
-	output []rune
+func processLine(opts *processLineInput) (float64, error) {
+	line := strings.ReplaceAll(strings.ReplaceAll(string(opts.line), "\r", ""), "\n", "")
+	kfpld.Printf("stringify %s", line)
+
+	// convert to float64
+	f, err := strconv.ParseFloat(line, 64)
+	if err != nil {
+		// return error if not valid
+		return 0, fmt.Errorf("could not parse '%s' to float64", line)
+	}
+	kfpd.Printf("parse result: %v\n", f)
+
+	return f, nil
 }
 
-func processLine(opts *processLineInput) error {
-	line := string(opts.output)
-	kfpld.Printf("stringify %s", line)
-	fmt.Printf("hello: %s\n", strings.ReplaceAll(strings.ReplaceAll(line, "\r\n", ""), "\n", ""))
-	return nil
+func processSample(data []float64) (ResultSet, error) {
+	var res ResultSet
+	var err error
+	res.n = len(data)
+
+	var min float64
+	min, err = stats.Min(data)
+	if err != nil {
+		return res, err
+	}
+	res.min = min
+
+	var max float64
+	max, err = stats.Max(data)
+	if err != nil {
+		return res, err
+	}
+	res.max = max
+
+	var mean float64
+	mean, err = stats.Mean(data)
+	if err != nil {
+		return res, err
+	}
+	res.mean = mean
+
+	var mode []float64
+	mode, err = stats.Mode(data)
+	if err != nil {
+		return res, err
+	}
+	res.mode = mode
+
+	var median float64
+	median, err = stats.Median(data)
+	if err != nil {
+		return res, err
+	}
+	res.median = median
+
+	var sum float64
+	sum, err = stats.Sum(data)
+	if err != nil {
+		return res, err
+	}
+	res.sum = sum
+
+	var variance float64
+	variance, err = stats.Variance(data)
+	if err != nil {
+		return res, err
+	}
+	res.variance = variance
+
+	var stdev float64
+	stdev, err = stats.StandardDeviation(data)
+	if err != nil {
+		return res, err
+	}
+	res.stdev = stdev
+
+	var p50 float64
+	p50, err = stats.Percentile(data, 50)
+	if err != nil {
+		return res, err
+	}
+	res.p50 = p50
+
+	var p75 float64
+	p75, err = stats.Percentile(data, 75)
+	if err != nil {
+		return res, err
+	}
+	res.p75 = p75
+
+	var p90 float64
+	p90, err = stats.Percentile(data, 90)
+	if err != nil {
+		return res, err
+	}
+	res.p90 = p90
+
+	var p95 float64
+	p95, err = stats.Percentile(data, 95)
+	if err != nil {
+		return res, err
+	}
+	res.p95 = p95
+
+	var p99 float64
+	p99, err = stats.Percentile(data, 99)
+	if err != nil {
+		return res, err
+	}
+	res.p99 = p99
+
+	var qs stats.Quartiles
+	qs, err = stats.Quartile(data)
+	if err != nil {
+		return res, err
+	}
+	res.q1 = qs.Q1
+	res.q2 = qs.Q2
+	res.q3 = qs.Q3
+
+	var outliers stats.Outliers
+	outliers, err = stats.QuartileOutliers(data)
+	if err != nil {
+		return res, err
+	}
+	res.mildOutliers = outliers.Mild
+	res.extremeOutliers = outliers.Extreme
+
+	return res, nil
 }
diff --git a/commands/common_test.go b/commands/common_test.go
@@ -2,9 +2,6 @@ package commands
 
 import (
 	"bufio"
-	"fmt"
-	"io/ioutil"
-	"os"
 	"strings"
 	"testing"
 
@@ -14,61 +11,78 @@ import (
 func Test_processLine(t *testing.T) {
 	is := assert.New(t)
 
-	t.Run("prepend hello [single]", func(t *testing.T) {
-		rescueStdout := os.Stdout
-		r, w, _ := os.Pipe()
-		os.Stdout = w
-
-		input := "Test"
-		err := processLine(&processLineInput{
-			output: []rune(input),
+	t.Run("parses number [simple]", func(t *testing.T) {
+		input := "1.234"
+		f, err := processLine(&processLineInput{
+			line: []rune(input),
 		})
 		is.NoError(err)
-
-		_ = w.Close()
-		out, _ := ioutil.ReadAll(r)
-		os.Stdout = rescueStdout
-		is.Equal(fmt.Sprintf("hello: %s\n", input), string(out))
+		is.Equal(1.234, f)
 	})
 
-	t.Run("prepend hello [carriage return]", func(t *testing.T) {
-		rescueStdout := os.Stdout
-		r, w, _ := os.Pipe()
-		os.Stdout = w
+	t.Run("parses number [scientific]", func(t *testing.T) {
+		input := "1.234560e+02"
+		f, err := processLine(&processLineInput{
+			line: []rune(input),
+		})
+		is.NoError(err)
+		is.Equal(123.456, f)
+	})
 
-		input := "Test\r\n"
-		err := processLine(&processLineInput{
-			output: []rune(input),
+	t.Run("parses number [carriage return]", func(t *testing.T) {
+		input := "1.234\r\n"
+		f, err := processLine(&processLineInput{
+			line: []rune(input),
 		})
 		is.NoError(err)
+		is.Equal(1.234, f)
+	})
 
-		_ = w.Close()
-		out, _ := ioutil.ReadAll(r)
-		os.Stdout = rescueStdout
-		is.Equal(fmt.Sprintf("hello: %s\n", "Test"), string(out))
+	t.Run("parses number [new line]", func(t *testing.T) {
+		input := "1.234\n"
+		f, err := processLine(&processLineInput{
+			line: []rune(input),
+		})
+		is.NoError(err)
+		is.Equal(1.234, f)
 	})
 }
 
 func Test_processReader(t *testing.T) {
 	is := assert.New(t)
 
-	sample := `test1
-test2
-test3`
-
-	t.Run("processes many lines and outputs matches", func(t *testing.T) {
-		rescueStdout := os.Stdout
-		r, w, _ := os.Pipe()
-		os.Stdout = w
+	t.Run("processes lines and generate sample", func(t *testing.T) {
+		data := `1.234
+test bad line
+1.234560e+02`
+		sample, err := processReader(&processReaderInput{
+			reader: bufio.NewReader(strings.NewReader(data)),
+		})
+		is.NoError(err)
+		is.Len(sample, 2)
+		is.Equal([]float64{1.234, 123.456}, sample)
+	})
 
-		err := processReader(&processReaderInput{
-			reader: bufio.NewReader(strings.NewReader(sample)),
+	t.Run("processes lines and generate sample [carriage return]", func(t *testing.T) {
+		data := "1.234\r\ntest bad line\r\n1.234560e+02\r\n"
+		sample, err := processReader(&processReaderInput{
+			reader: bufio.NewReader(strings.NewReader(data)),
 		})
 		is.NoError(err)
+		is.Len(sample, 2)
+		is.Equal([]float64{1.234, 123.456}, sample)
+	})
+}
+
+func Test_processSample(t *testing.T) {
+	is := assert.New(t)
 
-		_ = w.Close()
-		out, _ := ioutil.ReadAll(r)
-		os.Stdout = rescueStdout
-		is.Equal("hello: test1\nhello: test2\nhello: test3\n", string(out))
+	t.Run("processes lines and generate sample", func(t *testing.T) {
+		data := []float64{1, 2, 3, 4}
+		result, err := processSample(data)
+		is.NoError(err)
+		is.IsType(ResultSet{}, result)
+		is.Equal(float64(1), result.min)
+		is.Equal(float64(4), result.max)
 	})
 }