diff --git a/query/common_test.go b/query/common_test.go index 703ca796330..f5e0c6c9af1 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -324,7 +324,10 @@ noindex_salary : float . language : [string] . score : [int] @index(int) . average : [float] @index(float) . -gender : string . +gender : string . +indexpred : string @index(exact) . +pred : string . +pname : string . ` func populateCluster() { @@ -758,6 +761,31 @@ func populateCluster() { <20001> "49.33" . <20001> "mahi" . <20001> "ms" . + + # data for testing consistency of sort + <61> "A" . + <62> "B" . + <63> "C" . + <64> "D" . + <65> "E" . + + <61> "A" . + <62> "B" . + <63> "C" . + <64> "D" . + <65> "E" . + + <61> "nameA" . + <62> "nameB" . + <63> "nameC" . + <64> "nameD" . + <65> "nameE" . + <66> "nameF" . + <67> "nameG" . + <68> "nameH" . + <69> "nameI" . + <70> "nameJ" . + `) if err != nil { panic(fmt.Sprintf("Could not able add triple to the cluster. Got error %v", err.Error())) diff --git a/query/query1_test.go b/query/query1_test.go index 80cf6ad044e..0ab4888a6c8 100644 --- a/query/query1_test.go +++ b/query/query1_test.go @@ -19,6 +19,7 @@ package query import ( "context" "encoding/json" + "fmt" "io/ioutil" "strings" "testing" @@ -1915,6 +1916,158 @@ func TestMultiSort7Paginate(t *testing.T) { require.JSONEq(t, `{"data": {"me":[{"name":"Alice","age":25},{"name":"Alice","age":75},{"name":"Alice","age":75},{"name":"Bob","age":25},{"name":"Bob","age":75},{"name":"Colin","age":25},{"name":"Elizabeth","age":25}]}}`, js) } +func TestSortWithNulls(t *testing.T) { + tests := []struct { + index int32 + offset int32 + first int32 + desc bool + result string + }{ + {0, -1, -1, false, `{"data": {"me":[ + {"pname":"nameA","pred":"A"}, + {"pname":"nameB","pred":"B"}, + {"pname":"nameC","pred":"C"}, + {"pname":"nameD","pred":"D"}, + {"pname":"nameE","pred":"E"}, + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}, + {"pname":"nameJ"}]}}`, + }, + {1, -1, -1, true, `{"data": {"me":[ + {"pname":"nameE","pred":"E"}, + {"pname":"nameD","pred":"D"}, + {"pname":"nameC","pred":"C"}, + {"pname":"nameB","pred":"B"}, + {"pname":"nameA","pred":"A"}, + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}, + {"pname":"nameJ"}]}}`, + }, + {2, -1, 2, false, `{"data": {"me":[ + {"pname":"nameA", "pred": "A"}, + {"pname":"nameB","pred":"B"}]}}`, + }, + {4, -1, 2, true, `{"data": {"me":[ + {"pname":"nameE", "pred":"E"}, + {"pname":"nameD", "pred": "D"}]}}`, + }, + {5, -1, 7, false, `{"data": {"me":[ + {"pname":"nameA","pred":"A"}, + {"pname":"nameB","pred":"B"}, + {"pname":"nameC","pred":"C"}, + {"pname":"nameD","pred":"D"}, + {"pname":"nameE","pred":"E"}, + {"pname":"nameF"}, + {"pname":"nameG"}]}}`, + }, + {6, -1, 7, true, `{"data": {"me":[ + {"pname":"nameE","pred":"E"}, + {"pname":"nameD","pred":"D"}, + {"pname":"nameC","pred":"C"}, + {"pname":"nameB","pred":"B"}, + {"pname":"nameA","pred":"A"}, + {"pname":"nameF"}, + {"pname":"nameG"}]}}`, + }, + {7, 2, 7, false, `{"data": {"me":[ + {"pname":"nameC","pred":"C"}, + {"pname":"nameD","pred":"D"}, + {"pname":"nameE","pred":"E"}, + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}]}}`, + }, + {8, 2, 7, true, `{"data": {"me":[ + {"pname":"nameC","pred":"C"}, + {"pname":"nameB","pred":"B"}, + {"pname":"nameA","pred":"A"}, + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}]}}`, + }, + {9, 2, 100, false, `{"data": {"me":[ + {"pname":"nameC","pred":"C"}, + {"pname":"nameD","pred":"D"}, + {"pname":"nameE","pred":"E"}, + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}, + {"pname":"nameJ"}]}}`, + }, + {10, 2, 100, true, `{"data": {"me":[ + {"pname":"nameC","pred":"C"}, + {"pname":"nameB","pred":"B"}, + {"pname":"nameA","pred":"A"}, + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}, + {"pname":"nameJ"}]}}`, + }, + {11, 5, 5, false, `{"data": {"me":[ + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}, + {"pname":"nameJ"}]}}`, + }, + {12, 5, 5, true, `{"data": {"me":[ + {"pname":"nameF"}, + {"pname":"nameG"}, + {"pname":"nameH"}, + {"pname":"nameI"}, + {"pname":"nameJ"}]}}`, + }, + {13, 9, 5, false, `{"data": {"me":[ + {"pname":"nameJ"}]}}`, + }, + {14, 9, 5, true, `{"data": {"me":[ + {"pname":"nameJ"}]}}`, + }, + {15, 12, 5, false, `{"data": {"me":[]}}`}, + {16, 12, 5, true, `{"data": {"me":[]}}`}, + } + + makeQuery := func(offset, first int32, desc, index bool) string { + pred := "pred" + if index { + pred = "indexpred" + } + order := "orderasc: " + pred + if desc { + order = "orderdesc: " + pred + } + qfunc := "me(func: uid(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), " + qfunc += order + if offset != -1 { + qfunc += fmt.Sprintf(", offset: %d", offset) + } + if first != -1 { + qfunc += fmt.Sprintf(", first: %d", first) + } + query := "{" + qfunc + ") { pname pred:" + pred + " } }" + return processQueryNoErr(t, query) + } + + for _, tc := range tests { + // Case of sort with Index. + actual := makeQuery(tc.offset, tc.first, tc.desc, true) + require.JSONEqf(t, tc.result, actual, "Failed on index-testcase: %d\n", tc.index) + + // Case of sort without index + actual = makeQuery(tc.offset, tc.first, tc.desc, false) + require.JSONEqf(t, tc.result, actual, "Failed on testcase: %d\n", tc.index) + } +} + func TestMultiSortPaginateWithOffset(t *testing.T) { t.Parallel() tests := []struct { diff --git a/query/query2_test.go b/query/query2_test.go index 9f7f6eaa7e9..3c05914643e 100644 --- a/query/query2_test.go +++ b/query/query2_test.go @@ -973,7 +973,13 @@ func TestLanguageOrderIndexed3(t *testing.T) { require.JSONEq(t, `{ "data": { - "q": [] + "q": [{ + "name_lang_index@de": "öffnen", + "name_lang_index@sv": "zon" + }, { + "name_lang_index@de": "zumachen", + "name_lang_index@sv": "öppna" + }] } }`, js) @@ -993,7 +999,13 @@ func TestLanguageOrderIndexed4(t *testing.T) { require.JSONEq(t, `{ "data": { - "q": [] + "q": [{ + "name_lang_index@de": "öffnen", + "name_lang_index@sv": "zon" + }, { + "name_lang_index@de": "zumachen", + "name_lang_index@sv": "öppna" + }] } }`, js) diff --git a/query/rdf_result_test.go b/query/rdf_result_test.go index eb908b423c0..322e28d2a1f 100644 --- a/query/rdf_result_test.go +++ b/query/rdf_result_test.go @@ -219,12 +219,13 @@ func TestDateRDF(t *testing.T) { ` rdf, err := processQueryRDF(context.Background(), t, query) require.NoError(t, err) - require.Equal(t, rdf, `<0x1> "Michonne" . + expected := `<0x1> "Michonne" . <0x1> "female" . <0x1> <0x19> . <0x1> <0x18> . <0x1> <0x17> . <0x1> <0x1f> . +<0x1> <0x65> . <0x17> "Rick Grimes" . <0x18> "Glenn Rhee" . <0x19> "Daryl Dixon" . @@ -233,5 +234,6 @@ func TestDateRDF(t *testing.T) { <0x18> "1909-05-05T00:00:00Z" . <0x19> "1929-01-10T00:00:00Z" . <0x1f> "1801-01-15T00:00:00Z" . -`) +` + require.Equal(t, expected, rdf) } diff --git a/worker/sort.go b/worker/sort.go index dfb5a469fbd..3d893bd4b7c 100644 --- a/worker/sort.go +++ b/worker/sort.go @@ -189,8 +189,8 @@ func sortWithIndex(ctx context.Context, ts *pb.SortMessage) *sortresult { // offsets[i] is the offset for i-th posting list. It gets decremented as we // iterate over buckets. out[i].offset = int(ts.Offset) - var emptyList pb.List - out[i].ulist = &emptyList + out[i].ulist = &pb.List{} + out[i].skippedUids = &pb.List{} out[i].uset = map[uint64]struct{}{} } @@ -303,6 +303,39 @@ BUCKETS: } } + for i, ul := range ts.UidMatrix { + // nullNodes is list of UIDs for which the value of the sort predicate is null. + var nullNodes []uint64 + // present is a map[uid]->bool to keep track of the UIDs containing the sort predicate. + present := make(map[uint64]bool) + + // Add the UIDs to the map, which are in the resultant intersected list and the UIDs which + // have been skipped because of offset while intersection. + for _, uid := range out[i].ulist.Uids { + present[uid] = true + } + for _, uid := range out[i].skippedUids.Uids { + present[uid] = true + } + + // nullPreds is a list of UIDs which doesn't contain the sort predicate. + for _, uid := range ul.Uids { + if _, ok := present[uid]; !ok { + nullNodes = append(nullNodes, uid) + } + } + + // Apply the offset on null nodes, if the nodes with value were not enough. + if out[i].offset < len(nullNodes) { + nullNodes = nullNodes[out[i].offset:] + } else { + nullNodes = nullNodes[:0] + } + remainingCount := int(ts.Count) - len(r.UidMatrix[i].Uids) + canAppend := x.Min(uint64(remainingCount), uint64(len(nullNodes))) + r.UidMatrix[i].Uids = append(r.UidMatrix[i].Uids, nullNodes[:canAppend]...) + } + select { case <-ctx.Done(): return resultWithError(ctx.Err()) @@ -532,6 +565,7 @@ func fetchValues(ctx context.Context, in *pb.Query, idx int, or chan orderResult type intersectedList struct { offset int ulist *pb.List + skippedUids *pb.List values []types.Val uset map[uint64]struct{} multiSortOffset int32 @@ -586,8 +620,10 @@ func intersectBucket(ctx context.Context, ts *pb.SortMessage, token string, n := len(result.Uids) if il.offset >= n { // We are going to skip the whole intersection. No need to do actual - // sorting. Just update offsets[i]. We now offset less. + // sorting. Just update offsets[i]. We now offset less. Also, keep track of the UIDs + // that have been skipped for the offset. il.offset -= n + il.skippedUids.Uids = append(il.skippedUids.Uids, result.Uids...) continue } @@ -605,6 +641,9 @@ func intersectBucket(ctx context.Context, ts *pb.SortMessage, token string, if il.offset > 0 { // Apply the offset. if len(ts.Order) == 1 { + // Keep track of UIDs which had sort predicate but have been skipped because of + // the offset. + il.skippedUids.Uids = append(il.skippedUids.Uids, result.Uids[:il.offset]...) result.Uids = result.Uids[il.offset:n] } else { // In case of multi sort we can't apply the offset yet, as the order might change @@ -717,25 +756,31 @@ func sortByValue(ctx context.Context, ts *pb.SortMessage, ul *pb.List, return nil, errors.Errorf("Sorting on multiple language is not supported.") } + // nullsList is the list of UIDs for which value doesn't exist. + var nullsList []uint64 + var nullVals [][]types.Val for i := 0; i < lenList; i++ { select { case <-ctx.Done(): return multiSortVals, ctx.Err() default: uid := ul.Uids[i] - uids = append(uids, uid) val, err := fetchValue(uid, order.Attr, order.Langs, typ, ts.ReadTs) if err != nil { - // Value couldn't be found or couldn't be converted to the sort - // type. By using a nil Value, it will appear at the - // end (start) for orderasc (orderdesc). + // Value couldn't be found or couldn't be converted to the sort type. + // It will be appended to the end of the result based on the pagination. val.Value = nil + nullsList = append(nullsList, uid) + nullVals = append(nullVals, []types.Val{val}) + continue } + uids = append(uids, uid) values = append(values, []types.Val{val}) } } err := types.Sort(values, &uids, []bool{order.Desc}, lang) - ul.Uids = uids + ul.Uids = append(uids, nullsList...) + values = append(values, nullVals...) if len(ts.Order) > 1 { for _, v := range values { multiSortVals = append(multiSortVals, v[0])