Skip to content

Commit

Permalink
Bug fix issue 147: BUG with reading table that contains copied map (#149
Browse files Browse the repository at this point in the history
)

Issue with reading a table with a MAP column, but only when the table
was created by copying an existing table. The bug was caused by an
indexing issue when reading map values from an arrow batch. The
occurrence of the bug was dependent on the configuration of batches the
result set was broken down into. Added a new test using the arrow batch
returned in the error scenario. Also fixed an existing test that had
been masking this bug.
  • Loading branch information
rcypher-databricks committed Jul 27, 2023
2 parents f88afbe + 3b7fbc2 commit 45520d9
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 4 deletions.
48 changes: 47 additions & 1 deletion internal/rows/arrowbased/arrowRows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,7 @@ func TestArrowRowScanner(t *testing.T) {
"[\"2021-07-01 05:43:28 +0000 UTC\",\"-2022-08-13 14:01:01 +0000 UTC\",null]",
"[\"Gr8=\",\"D/8=\",null]",
"[[1,2,3],[4,5,6],null]",
"[{\"key1\":1,\"key2\":2},{\"key1\":1,\"key2\":2},null]",
"[{\"key1\":1,\"key2\":2},{\"key3\":3,\"key4\":4},null]",
"[{\"Field1\":77,\"Field2\":\"2020-12-31 00:00:00 +0000 UTC\"},{\"Field1\":13,\"Field2\":\"-2020-12-31 00:00:00 +0000 UTC\"},{\"Field1\":null,\"Field2\":null}]",
"[5.15,123.45,null]",
"[\"2020-12-31 00:00:00 +0000 UTC\",\"-2020-12-31 00:00:00 +0000 UTC\",null]",
Expand Down Expand Up @@ -1287,6 +1287,52 @@ func TestArrowRowScanner(t *testing.T) {

})

t.Run("Retrieve values - maps issue 147", func(t *testing.T) {
// This is a test for a bug reported as github issue 147
// After copying a table with a column of type 'MAP<STRING, STRING>' querying the copy
// would return the map value for the first row in all rows.
// This was caused by an indexing bug when retrieving map values that showed up based on
// how the result set was broken into arrow batches.
expected := [][]driver.Value{
{1, map[string]string{"name": "alice2"}},
{2, map[string]string{"name": "bob2"}},
{3, map[string]string{"name": "jon2"}},
}

executeStatementResp := cli_service.TExecuteStatementResp{}
loadTestData(t, "issue147.json", &executeStatementResp)

config := config.WithDefaults()
config.UseArrowNativeTimestamp = true
config.UseArrowNativeComplexTypes = true
config.UseArrowNativeDecimal = false
config.UseArrowNativeIntervalTypes = false
d, err := NewArrowRowScanner(executeStatementResp.DirectResults.ResultSetMetadata, executeStatementResp.DirectResults.ResultSet.Results, config, nil, context.Background())
assert.Nil(t, err)

ars := d.(*arrowRowScanner)

dest := make([]driver.Value, len(executeStatementResp.DirectResults.ResultSetMetadata.Schema.Columns))

for i := range expected {
err = ars.ScanRow(dest, int64(i))
assert.Nil(t, err)

var id int
s := dest[0].(string)
err := json.Unmarshal([]byte(s), &id)
assert.Nil(t, err)
assert.Equal(t, expected[i][0], id)

var foo map[string]string
s = dest[1].(string)
err = json.Unmarshal([]byte(s), &foo)
assert.Nil(t, err)
assert.Equal(t, expected[i][1], foo)
}

})

t.Run("Retrieve values - structs", func(t *testing.T) {
expected := []driver.Value{
"{\"f1\":1,\"f2\":\"-0450-11-13 00:00:00 +0000 UTC\",\"f3\":\"-2022-08-13 14:01:01 +0000 UTC\",\"f4\":{\"5\":5,\"6\":7},\"f5\":{\"ield1\":7,\"Field2\":\"-0450-11-13 00:00:00 +0000 UTC\"}}",
Expand Down
6 changes: 3 additions & 3 deletions internal/rows/arrowbased/columnValues.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ func (mvc *mapValueContainer) Value(i int) (any, error) {
len := e - s
r := "{"
for i := int64(0); i < len; i++ {
k, err := mvc.keys.Value(int(i))
k, err := mvc.keys.Value(int(i + s))
if err != nil {
return nil, err
}
Expand All @@ -214,13 +214,13 @@ func (mvc *mapValueContainer) Value(i int) (any, error) {
return nil, err
}

v, err := mvc.values.Value(int(i))
v, err := mvc.values.Value(int(i + s))
if err != nil {
return nil, err
}

var b string
if mvc.values.IsNull(int(i)) {
if mvc.values.IsNull(int(i + s)) {
b = "null"
} else if mvc.complexValue {
b = v.(string)
Expand Down
92 changes: 92 additions & 0 deletions internal/rows/arrowbased/testdata/issue147.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"status": {
"statusCode": "SUCCESS_STATUS"
},
"operationHandle": {
"operationId": {
"guid": "Ae4qWOAZGPWte+TeoMZC2w==",
"secret": "M41SnYJyRuuEgstBlGaDnQ=="
},
"operationType": "EXECUTE_STATEMENT",
"hasResultSet": true
},
"directResults": {
"operationStatus": {
"status": {
"statusCode": "SUCCESS_STATUS"
},
"operationState": "FINISHED_STATE",
"operationStarted": 1690227170051,
"operationCompleted": 1690227170417
},
"resultSetMetadata": {
"status": {
"statusCode": "SUCCESS_STATUS"
},
"schema": {
"columns": [
{
"columnName": "id",
"typeDesc": {
"types": [
{
"primitiveEntry": {
"type": "STRING_TYPE"
}
}
]
},
"position": 1,
"comment": ""
},
{
"columnName": "myMap",
"typeDesc": {
"types": [
{
"primitiveEntry": {
"type": "MAP_TYPE"
}
}
]
},
"position": 2,
"comment": ""
}
]
},
"resultFormat": "ARROW_BASED_SET",
"lz4Compressed": false,
"arrowSchema": "/////6ACAAAQAAAAAAAKAA4ABgANAAgACgAAAAAABAAQAAAAAAEKAAwAAAAIAAQACgAAAAgAAAAIAAAAAAAAAAIAAAC0AQAABAAAAGb+//8UAAAA2AAAAIABAAAAABEBfAEAAAIAAACEAAAABAAAABz+//8IAAAAWAAAAE8AAAB7InR5cGUiOiJtYXAiLCJrZXlUeXBlIjoic3RyaW5nIiwidmFsdWVUeXBlIjoic3RyaW5nIiwidmFsdWVDb250YWluc051bGwiOnRydWV9ABcAAABTcGFyazpEYXRhVHlwZTpKc29uVHlwZQCY/v//CAAAABwAAAATAAAATUFQPFNUUklORywgU1RSSU5HPgAWAAAAU3Bhcms6RGF0YVR5cGU6U3FsTmFtZQAAAQAAAAQAAACq////FAAAABQAAACIAAAAAAAADYQAAAAAAAAAAgAAAEgAAAAEAAAAdv///xQAAAAUAAAAFAAAAAAABQEQAAAAAAAAAAAAAADo/v//BQAAAHZhbHVlABIAGAAUAAAAEwAMAAAACAAEABIAAAAUAAAAFAAAABQAAAAAAAAFEAAAAAAAAAAAAAAAKP///wMAAABrZXkANP///wcAAABlbnRyaWVzAET///8FAAAAbXlNYXAAEgAYABQAEwASAAwAAAAIAAQAEgAAABQAAACQAAAAlAAAAAAABQGQAAAAAgAAAEgAAAAEAAAAyP///wgAAAAUAAAACAAAACJzdHJpbmciAAAAABcAAABTcGFyazpEYXRhVHlwZTpKc29uVHlwZQAIAAwACAAEAAgAAAAIAAAAEAAAAAYAAABTVFJJTkcAABYAAABTcGFyazpEYXRhVHlwZTpTcWxOYW1lAAAAAAAABAAEAAQAAAACAAAAaWQAAAAAAAA=",
"cacheLookupResult": "LOCAL_CACHE_HIT",
"uncompressedBytes": 512,
"compressedBytes": 512
},
"resultSet": {
"status": {
"statusCode": "SUCCESS_STATUS"
},
"hasMoreRows": false,
"results": {
"startRowOffset": 0,
"rows": [],
"arrowBatches": [
{
"batch": "/////2gBAAAUAAAAAAAAAAwAFgAOABUAEAAEAAwAAACQAAAAAAAAAAAABAAQAAAAAAMKABgADAAIAAQACgAAABQAAADYAAAAAwAAAAAAAAAAAAAADAAAAAAAAAAAAAAAAQAAAAAAAAAIAAAAAAAAABAAAAAAAAAAGAAAAAAAAAADAAAAAAAAACAAAAAAAAAAAQAAAAAAAAAoAAAAAAAAABAAAAAAAAAAOAAAAAAAAAABAAAAAAAAAEAAAAAAAAAAAQAAAAAAAABIAAAAAAAAABAAAAAAAAAAWAAAAAAAAAAMAAAAAAAAAGgAAAAAAAAAAQAAAAAAAABwAAAAAAAAABAAAAAAAAAAgAAAAAAAAAAOAAAAAAAAAAAAAAAFAAAAAwAAAAAAAAAAAAAAAAAAAAMAAAAAAAAAAAAAAAAAAAADAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAAAAAAAAAAAMAAAAAAAAAAAAAAAAAAAAHAAAAAAAAAAAAAAABAAAAAgAAAAMAAAAxMjMAAAAAAAcAAAAAAAAAAAAAAAEAAAACAAAAAwAAAAcAAAAAAAAABwAAAAAAAAAAAAAABAAAAAgAAAAMAAAAbmFtZW5hbWVuYW1lAAAAAAcAAAAAAAAAAAAAAAYAAAAKAAAADgAAAGFsaWNlMmJvYjJqb24yAAA=",
"rowCount": 3
}
]
}
},
"closeOperation": {
"status": {
"statusCode": "SUCCESS_STATUS"
}
}
},
"executionRejected": false,
"maxClusterCapacity": 10,
"queryCost": 0.5,
"currentClusterLoad": 1,
"idempotencyType": "IDEMPOTENT"
}

0 comments on commit 45520d9

Please sign in to comment.