|
52 | 52 | emptyValueList = pb.ValueList{Values: []*pb.TaskValue{}} |
53 | 53 | ) |
54 | 54 |
|
55 | | -func invokeNetworkRequest( |
56 | | - ctx context.Context, addr string, f func(context.Context, pb.WorkerClient) (interface{}, error)) (interface{}, error) { |
| 55 | +func invokeNetworkRequest(ctx context.Context, addr string, |
| 56 | + f func(context.Context, pb.WorkerClient) (interface{}, error)) (interface{}, error) { |
57 | 57 | pl, err := conn.Get().Get(addr) |
58 | 58 | if err != nil { |
59 | 59 | return &emptyResult, x.Wrapf(err, "dispatchTaskOverNetwork: while retrieving connection.") |
@@ -270,7 +270,7 @@ func parseFuncTypeHelper(name string) (FuncType, string) { |
270 | 270 |
|
271 | 271 | func needsIndex(fnType FuncType) bool { |
272 | 272 | switch fnType { |
273 | | - case CompareAttrFn, GeoFn, RegexFn, FullTextSearchFn, StandardFn: |
| 273 | + case CompareAttrFn, GeoFn, FullTextSearchFn, StandardFn: |
274 | 274 | return true |
275 | 275 | default: |
276 | 276 | return false |
@@ -876,88 +876,102 @@ func (qs *queryState) handleCompareScalarFunction(arg funcArgs) error { |
876 | 876 | } |
877 | 877 |
|
878 | 878 | func (qs *queryState) handleRegexFunction(ctx context.Context, arg funcArgs) error { |
| 879 | + span := otrace.FromContext(ctx) |
| 880 | + stop := x.SpanTimer(span, "handleRegexFunction") |
| 881 | + defer stop() |
| 882 | + if span != nil { |
| 883 | + span.Annotatef(nil, "Number of uids: %d. args.srcFn: %+v", arg.srcFn.n, arg.srcFn) |
| 884 | + } |
| 885 | + |
879 | 886 | attr := arg.q.Attr |
880 | 887 | typ, err := schema.State().TypeOf(attr) |
| 888 | + span.Annotatef(nil, "Attr: %s. Type: %s", attr, typ.Name()) |
881 | 889 | if err != nil || !typ.IsScalar() { |
882 | 890 | return x.Errorf("Attribute not scalar: %s %v", attr, typ) |
883 | 891 | } |
884 | 892 | if typ != types.StringID { |
885 | 893 | return x.Errorf("Got non-string type. Regex match is allowed only on string type.") |
886 | 894 | } |
887 | | - tokenizers := schema.State().TokenizerNames(attr) |
888 | | - var found bool |
889 | | - for _, t := range tokenizers { |
890 | | - if t == "trigram" { // TODO(tzdybal) - maybe just rename to 'regex' tokenizer? |
891 | | - found = true |
892 | | - } |
893 | | - } |
894 | | - if !found { |
895 | | - return x.Errorf("Attribute %v does not have trigram index for regex matching.", attr) |
896 | | - } |
| 895 | + useIndex := schema.State().HasTokenizer(tok.IdentTrigram, attr) |
| 896 | + span.Annotatef(nil, "Trigram index found: %t, func at root: %t", |
| 897 | + useIndex, arg.srcFn.isFuncAtRoot) |
897 | 898 |
|
898 | 899 | query := cindex.RegexpQuery(arg.srcFn.regex.Syntax) |
899 | 900 | empty := pb.List{} |
900 | | - uids, err := uidsForRegex(attr, arg, query, &empty) |
| 901 | + uids := &pb.List{} |
| 902 | + |
| 903 | + // Here we determine the list of uids to match. |
| 904 | + switch { |
| 905 | + // If this is a filter eval, use the given uid list (good) |
| 906 | + case arg.q.UidList != nil && len(arg.q.UidList.Uids) != 0: |
| 907 | + uids = arg.q.UidList |
| 908 | + |
| 909 | + // Prefer to use an index (fast) |
| 910 | + case useIndex: |
| 911 | + uids, err = uidsForRegex(attr, arg, query, &empty) |
| 912 | + if err != nil { |
| 913 | + return err |
| 914 | + } |
| 915 | + |
| 916 | + // No index and at root, return error instructing user to use `has` or index. |
| 917 | + default: |
| 918 | + return x.Errorf( |
| 919 | + "Attribute %v does not have trigram index for regex matching. "+ |
| 920 | + "Please add a trigram index or use has/uid function with regexp() as filter.", |
| 921 | + attr) |
| 922 | + } |
| 923 | + |
| 924 | + arg.out.UidMatrix = append(arg.out.UidMatrix, uids) |
901 | 925 | isList := schema.State().IsList(attr) |
902 | 926 | lang := langForFunc(arg.q.Langs) |
903 | | - if uids != nil { |
904 | | - arg.out.UidMatrix = append(arg.out.UidMatrix, uids) |
905 | 927 |
|
906 | | - filtered := &pb.List{} |
907 | | - for _, uid := range uids.Uids { |
908 | | - select { |
909 | | - case <-ctx.Done(): |
910 | | - return ctx.Err() |
911 | | - default: |
912 | | - } |
913 | | - pl, err := qs.cache.Get(x.DataKey(attr, uid)) |
914 | | - if err != nil { |
915 | | - return err |
916 | | - } |
| 928 | + span.Annotatef(nil, "Total uids: %d, list: %t lang: %v", len(uids.Uids), isList, lang) |
917 | 929 |
|
918 | | - var val types.Val |
919 | | - if lang != "" { |
920 | | - val, err = pl.ValueForTag(arg.q.ReadTs, lang) |
921 | | - } else if isList { |
922 | | - vals, err := pl.AllUntaggedValues(arg.q.ReadTs) |
923 | | - if err == posting.ErrNoValue { |
924 | | - continue |
925 | | - } else if err != nil { |
926 | | - return err |
927 | | - } |
928 | | - for _, val := range vals { |
929 | | - // convert data from binary to appropriate format |
930 | | - strVal, err := types.Convert(val, types.StringID) |
931 | | - if err == nil && matchRegex(strVal, arg.srcFn.regex) { |
932 | | - filtered.Uids = append(filtered.Uids, uid) |
933 | | - break |
934 | | - } |
935 | | - } |
| 930 | + filtered := &pb.List{} |
| 931 | + for _, uid := range uids.Uids { |
| 932 | + select { |
| 933 | + case <-ctx.Done(): |
| 934 | + return ctx.Err() |
| 935 | + default: |
| 936 | + } |
| 937 | + pl, err := qs.cache.Get(x.DataKey(attr, uid)) |
| 938 | + if err != nil { |
| 939 | + return err |
| 940 | + } |
936 | 941 |
|
937 | | - continue |
938 | | - } else { |
939 | | - val, err = pl.Value(arg.q.ReadTs) |
940 | | - } |
| 942 | + vals := make([]types.Val, 1) |
| 943 | + switch { |
| 944 | + case lang != "": |
| 945 | + vals[0], err = pl.ValueForTag(arg.q.ReadTs, lang) |
| 946 | + |
| 947 | + case isList: |
| 948 | + vals, err = pl.AllUntaggedValues(arg.q.ReadTs) |
941 | 949 |
|
| 950 | + default: |
| 951 | + vals[0], err = pl.Value(arg.q.ReadTs) |
| 952 | + } |
| 953 | + if err != nil { |
942 | 954 | if err == posting.ErrNoValue { |
943 | 955 | continue |
944 | | - } else if err != nil { |
945 | | - return err |
946 | 956 | } |
| 957 | + return err |
| 958 | + } |
947 | 959 |
|
| 960 | + for _, val := range vals { |
948 | 961 | // convert data from binary to appropriate format |
949 | 962 | strVal, err := types.Convert(val, types.StringID) |
950 | 963 | if err == nil && matchRegex(strVal, arg.srcFn.regex) { |
951 | 964 | filtered.Uids = append(filtered.Uids, uid) |
| 965 | + // NOTE: We only add the uid once. |
| 966 | + break |
952 | 967 | } |
953 | 968 | } |
| 969 | + } |
954 | 970 |
|
955 | | - for i := 0; i < len(arg.out.UidMatrix); i++ { |
956 | | - algo.IntersectWith(arg.out.UidMatrix[i], filtered, arg.out.UidMatrix[i]) |
957 | | - } |
958 | | - } else { |
959 | | - return err |
| 971 | + for i := 0; i < len(arg.out.UidMatrix); i++ { |
| 972 | + algo.IntersectWith(arg.out.UidMatrix[i], filtered, arg.out.UidMatrix[i]) |
960 | 973 | } |
| 974 | + |
961 | 975 | return nil |
962 | 976 | } |
963 | 977 |
|
|
0 commit comments