-
Notifications
You must be signed in to change notification settings - Fork 0
/
inspect.go
131 lines (105 loc) · 3.16 KB
/
inspect.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright 2020 Adam Chalkley
//
// https://github.com/atc0005/check-mail
//
// Licensed under the MIT License. See LICENSE file in the project root for
// full license information.
package textutils
import (
"fmt"
"io"
"text/tabwriter"
"unicode"
)
// inspectString is a shared helper function that generates a summary table
// from a provided string to help identify Unicode characters incompatible
// with older database character sets (e.g., UTF8MB3). This summary table is
// written to the provided io.Writer interface.
func inspectString(s string, w io.Writer) error {
tw := tabwriter.NewWriter(w, 8, 8, 4, '\t', 0)
var status string
for i, c := range s {
status = "\xE2\x9B\x94 (no)"
if c <= UTF8MB3RangeEndRune {
// status = "\xF0\x9F\x8C\x9E (yes)"
status = "\xE2\x9C\x85 (yes)"
}
// fmt.Printf(
fmt.Fprintf(
tw,
"char %d: %c\t"+
"Decimal: %d\t"+
"IsSymbol: %t\t"+
"UTF8MB3 safe: %v\t"+
"code point: %U\t"+
"rune literal: %+q\t"+
// literal bytes in hex format roughly equivalent to what
// MySQL/MariaDB uses in their error messages.
// MariaDB [testing]> insert into unicode values ("Win a golden ticket to WooConf in Seattle😍");
// ERROR 1366 (22007): Incorrect string value: '\xF0\x9F\x98\x8D' for column `testing`.`unicode`.`string` at row 1
"Hex: % X\n",
i,
c,
c,
unicode.IsSymbol(c),
// c <= UTF8MB3RangeEndRune,
status,
c,
c,
// convert rune to string, then to byte slice
[]byte(string(c)),
)
// }
}
fmt.Fprintln(w)
if err := tw.Flush(); err != nil {
return fmt.Errorf(
"error occurred flushing tabwriter: %w",
err,
)
}
return nil
}
// InspectStrings generates a summary table from a provided slice of strings
// to help identify Unicode characters incompatible with older database
// character sets (e.g., UTF8MB3). This summary table is written to the
// provided io.Writer interface.
func InspectStrings(ss []string, w io.Writer) error {
for i, s := range ss {
fmt.Fprintf(w, "\nstring %d: %q\n", i, s)
err := inspectString(s, w)
if err != nil {
return err
}
fmt.Printf("\n\n**************************************************\n\n")
}
return nil
}
// InspectString generates a summary table from a provided string to help
// identify Unicode characters incompatible with older database character sets
// (e.g., UTF8MB3). This summary table is written to the provided io.Writer
// interface.
func InspectString(s string, w io.Writer) error {
fmt.Fprintf(w, "\nstring: %q\n", s)
err := inspectString(s, w)
if err != nil {
return err
}
fmt.Printf("\n\n**************************************************\n\n")
return nil
}
// CharsWithinRange indicates whether a provided string contains any
// characters outside of the provided character set range.
func CharsWithinRange(s string, start rune, end rune) bool {
for _, c := range s {
if c > end || c < start {
return false
}
}
return true
}
// WithinUTF8MB3Range indicates whether a provided string contains any
// characters outside of the UTF8MB3 character set range.
func WithinUTF8MB3Range(s string) bool {
return CharsWithinRange(s, UTF8MB3RangeStartRune, UTF8MB3RangeEndRune)
}