diff --git a/internal/natsort/LICENSE b/internal/natsort/LICENSE new file mode 100644 index 0000000..5c695fb --- /dev/null +++ b/internal/natsort/LICENSE @@ -0,0 +1,17 @@ +The MIT License (MIT) +Copyright (c) 2015 Frits van Bommel +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/internal/natsort/README.md b/internal/natsort/README.md new file mode 100644 index 0000000..8bc32e4 --- /dev/null +++ b/internal/natsort/README.md @@ -0,0 +1,10 @@ +# natsort + +This is a fork of the `sortorder` package of [github.com/fvbommel/util](https://github.com/fvbommel/util). + +## License + +* [MIT](./LICENSE) + - the original implementation of `sortorder` was released by [Frits van Bommel](https://github.com/fvbommel) under an MIT license. +* [Public domain](../../UNLICENSE) + - any changes made in this fork are released into the public domain. diff --git a/internal/natsort/natsort.go b/internal/natsort/natsort.go new file mode 100644 index 0000000..4400e2b --- /dev/null +++ b/internal/natsort/natsort.go @@ -0,0 +1,91 @@ +// Package natsort implements natural sort. In "Natural Sort Order" integers +// embedded in strings are compared by value. +// +// References: +// https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/ +package natsort + +import ( + "sort" +) + +// Strings sorts the given slice of strings in natural order. +func Strings(a []string) { + sort.Sort(Order(a)) +} + +// Order implements sort.Interface to sort strings in natural order. This means +// that e.g. "abc2" < "abc12". +// +// Non-digit sequences and numbers are compared separately. The former are +// compared bytewise, while the latter are compared numerically (except that +// the number of leading zeros is used as a tie-breaker, so e.g. "2" < "02") +// +// Limitation: only ASCII digits (0-9) are considered. +type Order []string + +func (n Order) Len() int { return len(n) } +func (n Order) Swap(i, j int) { n[i], n[j] = n[j], n[i] } +func (n Order) Less(i, j int) bool { return Less(n[i], n[j]) } + +// isdigit reports whether the given byte is a decimal digit. +func isdigit(b byte) bool { + return '0' <= b && b <= '9' +} + +// Less compares two strings using natural ordering. This means that e.g. "abc2" +// < "abc12". +// +// Non-digit sequences and numbers are compared separately. The former are +// compared bytewise, while the latter are compared numerically (except that +// the number of leading zeros is used as a tie-breaker, so e.g. "2" < "02") +// +// Limitation: only ASCII digits (0-9) are considered. +func Less(str1, str2 string) bool { + idx1, idx2 := 0, 0 + for idx1 < len(str1) && idx2 < len(str2) { + c1, c2 := str1[idx1], str2[idx2] + dig1, dig2 := isdigit(c1), isdigit(c2) + switch { + case dig1 && dig2: // Digits + // Eat zeros. + for ; idx1 < len(str1) && str1[idx1] == '0'; idx1++ { + } + for ; idx2 < len(str2) && str2[idx2] == '0'; idx2++ { + } + // Eat all digits. + nonZero1, nonZero2 := idx1, idx2 + for ; idx1 < len(str1) && isdigit(str1[idx1]); idx1++ { + } + for ; idx2 < len(str2) && isdigit(str2[idx2]); idx2++ { + } + // If lengths of numbers with non-zero prefix differ, the shorter + // one is less. + if len1, len2 := idx1-nonZero1, idx2-nonZero2; len1 != len2 { + return len1 < len2 + } + // If they're equal, string comparison is correct. + if nr1, nr2 := str1[nonZero1:idx1], str2[nonZero2:idx2]; nr1 != nr2 { + return nr1 < nr2 + } + // Otherwise, the one with less zeros is less. + // Because everything up to the number is equal, comparing the index + // after the zeros is sufficient. + if nonZero1 != nonZero2 { + return nonZero1 < nonZero2 + } + default: // non-digit characters + // UTF-8 compares bytewise-lexicographically, no need to decode + // codepoints. + if c1 != c2 { + return c1 < c2 + } + idx1++ + idx2++ + } + // They're identical so far, so continue comparing. + } + // So far they are identical. At least one is ended. If the other continues, + // it sorts last. + return len(str1) < len(str2) +} diff --git a/internal/natsort/natsort_test.go b/internal/natsort/natsort_test.go new file mode 100644 index 0000000..a9b1f52 --- /dev/null +++ b/internal/natsort/natsort_test.go @@ -0,0 +1,205 @@ +package natsort + +import ( + "math/rand" + "reflect" + "sort" + "strconv" + "testing" +) + +func TestStrings(t *testing.T) { + golden := []struct { + in []string + want []string + }{ + { + in: []string{"abc5", "abc1", "abc01", "ab", "abc10", "abc2"}, + want: []string{ + "ab", + "abc1", + "abc01", + "abc2", + "abc5", + "abc10", + }, + }, + { + in: []string{"foo20", "foo.bar", "foo2", "foo.10", "foo.1", "foo.20", "foo.11", "foo1", "foobar", "foo21", "foo10", "foo11", "foo.21", "foo.2"}, + want: []string{ + "foo.1", + "foo.2", + "foo.10", + "foo.11", + "foo.20", + "foo.21", + "foo.bar", + "foo1", + "foo2", + "foo10", + "foo11", + "foo20", + "foo21", + "foobar", + }, + }, + } + for _, g := range golden { + Strings(g.in) + if !reflect.DeepEqual(g.want, g.in) { + t.Errorf("Error: sort failed, expected: %#q, got: %#q", g.want, g.in) + } + } +} + +func TestLess(t *testing.T) { + testset := []struct { + s1, s2 string + less bool + }{ + {"0", "00", true}, + {"00", "0", false}, + {"aa", "ab", true}, + {"ab", "abc", true}, + {"abc", "ad", true}, + {"ab1", "ab2", true}, + {"ab1c", "ab1c", false}, + {"ab12", "abc", true}, + {"ab2a", "ab10", true}, + {"a0001", "a0000001", true}, + {"a10", "abcdefgh2", true}, + {"аб2аб", "аб10аб", true}, + {"2аб", "3аб", true}, + // + {"a1b", "a01b", true}, + {"a01b", "a1b", false}, + {"ab01b", "ab010b", true}, + {"ab010b", "ab01b", false}, + {"a01b001", "a001b01", true}, + {"a001b01", "a01b001", false}, + {"a1", "a1x", true}, + {"1ax", "1b", true}, + {"1b", "1ax", false}, + // + {"082", "83", true}, + // + {"083a", "9a", false}, + {"9a", "083a", true}, + // + {"foo.bar", "foo123", true}, + {"foo123", "foo.bar", false}, + } + for _, v := range testset { + if res := Less(v.s1, v.s2); res != v.less { + t.Errorf("Compared %#q to %#q: expected %v, got %v", + v.s1, v.s2, v.less, res) + } + } +} + +func BenchmarkStdStrings(b *testing.B) { + set := testSet(300) + arr := make([]string, len(set[0])) + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, list := range set { + b.StopTimer() + copy(arr, list) + b.StartTimer() + + sort.Strings(arr) + } + } +} + +func BenchmarkStrings(b *testing.B) { + set := testSet(300) + arr := make([]string, len(set[0])) + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, list := range set { + b.StopTimer() + copy(arr, list) + b.StartTimer() + + Strings(arr) + } + } +} + +func BenchmarkStdLess(b *testing.B) { + set := testSet(300) + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := range set[0] { + k := (j + 1) % len(set[0]) + _ = set[0][j] < set[0][k] + } + } +} + +func BenchmarkLess(b *testing.B) { + set := testSet(300) + b.ResetTimer() + for i := 0; i < b.N; i++ { + for j := range set[0] { + k := (j + 1) % len(set[0]) + _ = Less(set[0][j], set[0][k]) + } + } +} + +// Get 1000 arrays of 10000-string-arrays (less if -short is specified). +func testSet(seed int) [][]string { + gen := &generator{ + src: rand.New(rand.NewSource( + int64(seed), + )), + } + n := 1000 + if testing.Short() { + n = 1 + } + set := make([][]string, n) + for i := range set { + strings := make([]string, 10000) + for idx := range strings { + // Generate a random string + strings[idx] = gen.NextString() + } + set[i] = strings + } + return set +} + +type generator struct { + src *rand.Rand +} + +func (g *generator) NextInt(max int) int { + return g.src.Intn(max) +} + +// Gets random random-length alphanumeric string. +func (g *generator) NextString() (str string) { + // Random-length 3-8 chars part + strlen := g.src.Intn(6) + 3 + // Random-length 1-3 num + numlen := g.src.Intn(3) + 1 + // Random position for num in string + numpos := g.src.Intn(strlen + 1) + // Generate the number + var num string + for i := 0; i < numlen; i++ { + num += strconv.Itoa(g.src.Intn(10)) + } + // Put it all together + for i := 0; i < strlen+1; i++ { + if i == numpos { + str += num + } else { + str += string('a' + g.src.Intn(16)) + } + } + return str +} diff --git a/messagediff.go b/messagediff.go index b6f6437..a2cf299 100644 --- a/messagediff.go +++ b/messagediff.go @@ -3,10 +3,11 @@ package messagediff import ( "fmt" "reflect" - "sort" "strings" "time" "unsafe" + + "github.com/d4l3k/messagediff/internal/natsort" ) // PrettyDiff does a deep comparison and returns the nicely formated results. @@ -23,7 +24,7 @@ func PrettyDiff(a, b interface{}, options ...Option) (string, bool) { for path, modified := range d.Modified { dstr = append(dstr, fmt.Sprintf("modified: %s = %#v\n", path.String(), modified)) } - sort.Strings(dstr) + natsort.Strings(dstr) return strings.Join(dstr, ""), equal } diff --git a/messagediff_test.go b/messagediff_test.go index a8908a3..587b1e1 100644 --- a/messagediff_test.go +++ b/messagediff_test.go @@ -98,6 +98,12 @@ func TestPrettyDiff(t *testing.T) { "added: .C[1] = 2\nmodified: .b = 3\n", false, }, + { + testStruct{1, 3, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, [3]int{4, 5, 6}}, + testStruct{1, 3, []int{42, 43, 44, 3, 4, 5, 6, 7, 8, 9, 45, 46, 12}, [3]int{4, 5, 6}}, + "modified: .C[0] = 42\nmodified: .C[1] = 43\nmodified: .C[2] = 44\nmodified: .C[10] = 45\nmodified: .C[11] = 46\n", + false, + }, { nil, nil,