Skip to content

Commit

Permalink
Merge pull request #1224 from dolthub/daylon/more-coll-fixes
Browse files Browse the repository at this point in the history
SET NAMES collation fix, results have proper charset
  • Loading branch information
Hydrocharged committed Aug 29, 2022
2 parents 7ab9c75 + 2e64181 commit fe6fcf9
Show file tree
Hide file tree
Showing 39 changed files with 252 additions and 66 deletions.
25 changes: 12 additions & 13 deletions enginetest/enginetests.go
Original file line number Diff line number Diff line change
Expand Up @@ -5868,10 +5868,6 @@ func TestCharsetCollationWire(t *testing.T, h Harness, sessionBuilder server.Ses
port := getEmptyPort(t)
for _, script := range queries.CharsetCollationWireTests {
t.Run(script.Name, func(t *testing.T) {
ctx := NewContextWithClient(harness, sql.Client{
User: "root",
Address: "localhost",
})
serverConfig := server.Config{
Protocol: "tcp",
Address: fmt.Sprintf("localhost:%d", port),
Expand All @@ -5881,14 +5877,6 @@ func TestCharsetCollationWire(t *testing.T, h Harness, sessionBuilder server.Ses
engine := mustNewEngine(t, harness)
defer engine.Close()
engine.Analyzer.Catalog.MySQLDb.AddRootAccount()
for _, statement := range script.SetUpScript {
if sh, ok := harness.(SkippingHarness); ok {
if sh.SkipQueryTest(statement) {
t.Skip()
}
}
RunQueryWithContext(t, engine, harness, ctx, statement)
}

s, err := server.NewServer(serverConfig, engine, sessionBuilder, nil)
require.NoError(t, err)
Expand All @@ -5904,6 +5892,17 @@ func TestCharsetCollationWire(t *testing.T, h Harness, sessionBuilder server.Ses
require.NoError(t, err)
_, err = conn.Exec("USE mydb;")
require.NoError(t, err)

for _, statement := range script.SetUpScript {
if sh, ok := harness.(SkippingHarness); ok {
if sh.SkipQueryTest(statement) {
t.Skip()
}
}
_, err = conn.Exec(statement)
require.NoError(t, err)
}

for _, query := range script.Queries {
t.Run(query.Query, func(t *testing.T) {
r, err := conn.Query(query.Query)
Expand Down Expand Up @@ -6009,7 +6008,7 @@ func TestTypesOverWire(t *testing.T, h Harness, sessionBuilder server.SessionBui
}
expectedEngineRow := make([]*string, len(engineRow))
for i := range engineRow {
sqlVal, err := sch[i].Type.SQL(nil, engineRow[i])
sqlVal, err := sch[i].Type.SQL(ctx, nil, engineRow[i])
if !assert.NoError(t, err) {
break
}
Expand Down
26 changes: 26 additions & 0 deletions enginetest/queries/charset_collation_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,32 @@ var CharsetCollationEngineTests = []CharsetCollationEngineTest{
},
},
},
{
Name: "SET NAMES does not interfere with column charset",
SetUpScript: []string{
"SET NAMES utf8mb3;",
"CREATE TABLE test(pk BIGINT PRIMARY KEY, v1 VARCHAR(100) COLLATE utf8mb4_0900_bin);",
"INSERT INTO test VALUES (1, 'a'), (2, 'b');",
},
Queries: []CharsetCollationEngineTestQuery{
{
Query: "SELECT * FROM test ORDER BY v1 COLLATE utf8mb4_bin ASC;",
Expected: []sql.Row{{int64(1), "a"}, {int64(2), "b"}},
},
{
Query: "SELECT * FROM test ORDER BY v1 COLLATE utf8mb3_bin ASC;",
ErrKind: sql.ErrCollationInvalidForCharSet,
},
{
Query: "SELECT 'a' COLLATE utf8mb3_bin;",
Expected: []sql.Row{{"a"}},
},
{
Query: "SELECT 'a' COLLATE utf8mb4_bin;",
ErrKind: sql.ErrCollationInvalidForCharSet,
},
},
},
{
Name: "ENUM collation handling",
SetUpScript: []string{
Expand Down
80 changes: 80 additions & 0 deletions enginetest/queries/charset_collation_wire.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
{
Name: "Insert multiple character sets",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test (v1 VARCHAR(255) COLLATE utf16_unicode_ci);",
},
Queries: []CharsetCollationWireTestQuery{
Expand All @@ -60,6 +61,7 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
{
Name: "Sorting differences",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test1 (v1 VARCHAR(255) COLLATE utf8mb4_0900_bin);",
"CREATE TABLE test2 (v1 VARCHAR(255) COLLATE utf16_unicode_ci);",
},
Expand All @@ -85,6 +87,7 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
{
Name: "Order by behaves differently according to case-sensitivity",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test1 (pk BIGINT PRIMARY KEY, v1 VARCHAR(255) COLLATE utf16_unicode_ci, INDEX(v1));",
"CREATE TABLE test2 (pk BIGINT PRIMARY KEY, v1 VARCHAR(255) COLLATE utf8mb4_0900_bin, INDEX(v1));",
"INSERT INTO test1 VALUES (1, 'abc'), (2, 'ABC'), (3, 'aBc'), (4, 'AbC');",
Expand Down Expand Up @@ -120,6 +123,7 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
{
Name: "Proper index access",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test1 (pk BIGINT PRIMARY KEY, v1 VARCHAR(255) COLLATE utf16_unicode_ci, INDEX(v1));",
"CREATE TABLE test2 (pk BIGINT PRIMARY KEY, v1 VARCHAR(255) COLLATE utf8mb4_0900_bin, INDEX(v1));",
"INSERT INTO test1 VALUES (1, 'abc'), (2, 'ABC'), (3, 'aBc'), (4, 'AbC');",
Expand Down Expand Up @@ -198,9 +202,36 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
},
},
{
Name: "SET NAMES does not interfere with column charset",
SetUpScript: []string{
"SET NAMES utf8mb3;",
"CREATE TABLE test(pk BIGINT PRIMARY KEY, v1 VARCHAR(100) COLLATE utf8mb4_0900_bin);",
"INSERT INTO test VALUES (1, 'a'), (2, 'b');",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT * FROM test ORDER BY v1 COLLATE utf8mb4_bin ASC;",
Expected: []sql.Row{{"1", "a"}, {"2", "b"}},
},
{
Query: "SELECT * FROM test ORDER BY v1 COLLATE utf8mb3_bin ASC;",
Error: true,
},
{
Query: "SELECT 'a' COLLATE utf8mb3_bin;",
Expected: []sql.Row{{"a"}},
},
{
Query: "SELECT 'a' COLLATE utf8mb4_bin;",
Error: true,
},
},
},
{
Name: "ENUM collation handling",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test1 (pk BIGINT PRIMARY KEY, v1 ENUM('abc','def','ghi') COLLATE utf16_unicode_ci);",
"CREATE TABLE test2 (pk BIGINT PRIMARY KEY, v1 ENUM('abc','def','ghi') COLLATE utf8mb4_0900_bin);",
},
Expand Down Expand Up @@ -244,6 +275,7 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
{
Name: "SET collation handling",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test1 (pk BIGINT PRIMARY KEY, v1 SET('a','b','c') COLLATE utf16_unicode_ci);",
"CREATE TABLE test2 (pk BIGINT PRIMARY KEY, v1 SET('a','b','c') COLLATE utf8mb4_0900_bin);",
},
Expand Down Expand Up @@ -284,6 +316,36 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
},
},
{
Name: "Correct behavior with `character_set_results`",
SetUpScript: []string{
"SET character_set_results = 'binary';",
"CREATE TABLE test (v1 VARCHAR(255) COLLATE utf16_unicode_ci);",
"INSERT INTO test VALUES (_utf8mb4'hey');",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT * FROM test;",
Expected: []sql.Row{{"\x00h\x00e\x00y"}},
},
{
Query: "SET character_set_results = 'utf8mb4';",
Expected: []sql.Row{{sql.NewOkResult(0)}},
},
{
Query: "SELECT * FROM test;",
Expected: []sql.Row{{"hey"}},
},
{
Query: "SET character_set_results = 'utf32';",
Expected: []sql.Row{{sql.NewOkResult(0)}},
},
{
Query: "SELECT * FROM test;",
Expected: []sql.Row{{"\x00\x00\x00h\x00\x00\x00e\x00\x00\x00y"}},
},
},
},
{
Name: "LENGTH() function",
Queries: []CharsetCollationWireTestQuery{
Expand Down Expand Up @@ -332,6 +394,9 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
{
Name: "UPPER() function",
SetUpScript: []string{
"SET character_set_results = 'binary';",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT UPPER(_utf16'\x00a\x00B\x00c' COLLATE utf16_unicode_ci);",
Expand All @@ -355,6 +420,9 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
{
Name: "LOWER() function",
SetUpScript: []string{
"SET character_set_results = 'binary';",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT LOWER(_utf16'\x00A\x00b\x00C' COLLATE utf16_unicode_ci);",
Expand Down Expand Up @@ -512,6 +580,9 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
{
Name: "SUBSTRING() function",
SetUpScript: []string{
"SET character_set_results = 'binary';",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT SUBSTRING(_utf16'\x00a\x00b\x00c\x00d' COLLATE utf16_unicode_ci, 2, 2);",
Expand Down Expand Up @@ -575,6 +646,9 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
{
Name: "TRIM() function",
SetUpScript: []string{
"SET character_set_results = 'binary';",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT TRIM(_utf16'\x00 \x00a\x00b\x00c\x00 ' COLLATE utf16_unicode_ci);",
Expand All @@ -598,6 +672,9 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
{
Name: "RTRIM() function",
SetUpScript: []string{
"SET character_set_results = 'binary';",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT RTRIM(_utf16'\x00 \x00a\x00b\x00c\x00 ' COLLATE utf16_unicode_ci);",
Expand All @@ -621,6 +698,9 @@ var CharsetCollationWireTests = []CharsetCollationWireTest{
},
{
Name: "LTRIM() function",
SetUpScript: []string{
"SET character_set_results = 'binary';",
},
Queries: []CharsetCollationWireTestQuery{
{
Query: "SELECT LTRIM(_utf16'\x00 \x00a\x00b\x00c\x00 ' COLLATE utf16_unicode_ci);",
Expand Down
8 changes: 8 additions & 0 deletions enginetest/queries/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -3819,6 +3819,14 @@ var QueryTests = []QueryTest{
{},
},
},
{
Query: "SET collation_connection = '" +
sql.Collation_Default.String() +
"';",
Expected: []sql.Row{
{},
},
},
{
Query: `SHOW DATABASES`,
Expected: []sql.Row{{"mydb"}, {"foo"}, {"information_schema"}, {"mysql"}},
Expand Down
4 changes: 2 additions & 2 deletions enginetest/queries/variable_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ var VariableQueries = []ScriptTest{
{
Name: "set names quoted",
SetUpScript: []string{
`set NAMES "charset"`,
`set NAMES "utf8mb3"`,
},
Query: "SELECT @@character_set_client, @@character_set_connection, @@character_set_results",
Expected: []sql.Row{
{"charset", "charset", "charset"},
{"utf8mb3", "utf8mb3", "utf8mb3"},
},
},
{
Expand Down
6 changes: 3 additions & 3 deletions server/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ func (h *Handler) doQuery(
continue
}

outputRow, err := rowToSQL(schema, row)
outputRow, err := rowToSQL(ctx, schema, row)
if err != nil {
return err
}
Expand Down Expand Up @@ -711,7 +711,7 @@ func (h *Handler) WarningCount(c *mysql.Conn) uint16 {
return 0
}

func rowToSQL(s sql.Schema, row sql.Row) ([]sqltypes.Value, error) {
func rowToSQL(ctx *sql.Context, s sql.Schema, row sql.Row) ([]sqltypes.Value, error) {
o := make([]sqltypes.Value, len(row))
var err error
for i, v := range row {
Expand All @@ -720,7 +720,7 @@ func rowToSQL(s sql.Schema, row sql.Row) ([]sqltypes.Value, error) {
continue
}

o[i], err = s[i].Type.SQL(nil, v)
o[i], err = s[i].Type.SQL(ctx, nil, v)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion sql/bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ func (t bitType) Promote() Type {
}

// SQL implements Type interface.
func (t bitType) SQL(dest []byte, v interface{}) (sqltypes.Value, error) {
func (t bitType) SQL(ctx *Context, dest []byte, v interface{}) (sqltypes.Value, error) {
if v == nil {
return sqltypes.NULL, nil
}
Expand Down
2 changes: 1 addition & 1 deletion sql/charactersets.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ func ParseCharacterSet(str string) (CharacterSetID, error) {
return cs, nil
}
// It is valid to parse the invalid charset, as some analyzer steps may temporarily use the invalid charset
if str == CharacterSet_Invalid.Name() {
if str == CharacterSet_Invalid.Name() || str == "" {
return CharacterSet_Invalid, nil
}
return CharacterSet_Invalid, ErrCharSetUnknown.New(str)
Expand Down
2 changes: 1 addition & 1 deletion sql/datetimetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ func (t datetimeType) Promote() Type {
}

// SQL implements Type interface.
func (t datetimeType) SQL(dest []byte, v interface{}) (sqltypes.Value, error) {
func (t datetimeType) SQL(ctx *Context, dest []byte, v interface{}) (sqltypes.Value, error) {
if v == nil {
return sqltypes.NULL, nil
}
Expand Down
2 changes: 1 addition & 1 deletion sql/decimal.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ func (t decimalType) Promote() Type {
}

// SQL implements Type interface.
func (t decimalType) SQL(dest []byte, v interface{}) (sqltypes.Value, error) {
func (t decimalType) SQL(ctx *Context, dest []byte, v interface{}) (sqltypes.Value, error) {
if v == nil {
return sqltypes.NULL, nil
}
Expand Down
2 changes: 1 addition & 1 deletion sql/deferredtype.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func (t deferredType) Promote() Type {
}

// SQL implements Type interface.
func (t deferredType) SQL(dest []byte, v interface{}) (sqltypes.Value, error) {
func (t deferredType) SQL(ctx *Context, dest []byte, v interface{}) (sqltypes.Value, error) {
return sqltypes.NULL, nil
}

Expand Down
8 changes: 6 additions & 2 deletions sql/enumtype.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ func (t enumType) Promote() Type {
}

// SQL implements Type interface.
func (t enumType) SQL(dest []byte, v interface{}) (sqltypes.Value, error) {
func (t enumType) SQL(ctx *Context, dest []byte, v interface{}) (sqltypes.Value, error) {
if v == nil {
return sqltypes.NULL, nil
}
Expand All @@ -245,7 +245,11 @@ func (t enumType) SQL(dest []byte, v interface{}) (sqltypes.Value, error) {
}
value, _ := t.At(int(convertedValue.(uint16)))

encodedBytes, ok := t.collation.CharacterSet().Encoder().Encode(encodings.StringToBytes(value))
resultCharset := ctx.GetCharacterSetResults()
if resultCharset == CharacterSet_Invalid || resultCharset == CharacterSet_binary {
resultCharset = t.collation.CharacterSet()
}
encodedBytes, ok := resultCharset.Encoder().Encode(encodings.StringToBytes(value))
if !ok {
return sqltypes.Value{}, ErrCharSetFailedToEncode.New(t.collation.CharacterSet().Name())
}
Expand Down

0 comments on commit fe6fcf9

Please sign in to comment.