Bug fix and optimize per CPU profiler

Run 1 TaskQPS baseline StdDevQPS my_modified_version StdDev Pct diff p-value CFQHighHighHigh 4.64 (6.5%) 3.30 (4.7%) -29.0% ( -37% - -19%) 0.000 CFQHighHigh 11.09 (6.0%) 9.61 (6.0%) -13.3% ( -23% - -1%) 0.000 PKLookup 103.38 (4.4%) 108.04 (4.3%) 4.5% ( -4% - 13%) 0.001 CFQHighMedLow 10.58 (6.1%) 12.30 (8.7%) 16.2% ( 1% - 33%) 0.000 CFQHighMed 10.70 (7.4%) 15.51 (11.2%) 44.9% ( 24% - 68%) 0.000 CFQHighLowLow 8.18 (8.2%) 12.87 (11.6%) 57.3% ( 34% - 84%) 0.000 CFQHighLow 14.57 (7.5%) 30.81 (15.1%) 111.4% ( 82% - 144%) 0.000 Run 2 TaskQPS baseline StdDevQPS my_modified_version StdDev Pct diff p-value CFQHighHighHigh 5.33 (5.7%) 4.02 (7.7%) -24.4% ( -35% - -11%) 0.000 CFQHighLowLow 17.14 (6.2%) 13.06 (5.4%) -23.8% ( -33% - -13%) 0.000 CFQHighMed 17.37 (5.8%) 14.38 (7.7%) -17.2% ( -29% - -3%) 0.000 PKLookup 103.57 (5.5%) 108.84 (5.9%) 5.1% ( -6% - 17%) 0.005 CFQHighMedLow 11.25 (7.2%) 12.70 (9.0%) 12.9% ( -3% - 31%) 0.000 CFQHighHigh 5.00 (6.2%) 7.54 (12.1%) 51.0% ( 30% - 73%) 0.000 CFQHighLow 21.60 (5.2%) 34.57 (14.1%) 60.0% ( 38% - 83%) 0.000 Run 3 TaskQPS baseline StdDevQPS my_modified_version StdDev Pct diff p-value CFQHighHighHigh 5.40 (6.9%) 4.06 (5.1%) -24.8% ( -34% - -13%) 0.000 CFQHighMedLow 7.64 (7.4%) 5.79 (6.3%) -24.2% ( -35% - -11%) 0.000 CFQHighHigh 11.11 (7.0%) 9.60 (5.9%) -13.6% ( -24% - 0%) 0.000 CFQHighLowLow 21.21 (7.6%) 21.22 (6.6%) 0.0% ( -13% - 15%) 0.993 PKLookup 103.15 (5.9%) 107.60 (6.9%) 4.3% ( -8% - 18%) 0.034 CFQHighLow 21.85 (8.1%) 34.18 (13.5%) 56.4% ( 32% - 84%) 0.000 CFQHighMed 12.07 (8.4%) 19.98 (16.7%) 65.5% ( 37% - 98%) 0.000 Run 4 TaskQPS baseline StdDevQPS my_modified_version StdDev Pct diff p-value CFQHighHigh 8.50 (5.8%) 6.85 (5.2%) -19.5% ( -28% - -8%) 0.000 CFQHighMedLow 10.89 (5.7%) 8.96 (5.4%) -17.8% ( -27% - -7%) 0.000 CFQHighMed 8.41 (5.8%) 7.74 (5.6%) -7.9% ( -18% - 3%) 0.000 CFQHighHighHigh 3.45 (6.7%) 3.38 (5.3%) -2.0% ( -13% - 10%) 0.287 CFQHighLowLow 7.82 (6.4%) 8.20 (7.5%) 4.8% ( -8% - 20%) 0.030 PKLookup 103.50 (5.0%) 110.69 (5.4%) 6.9% ( -3% - 18%) 0.000 CFQHighLow 11.46 (6.0%) 13.16 (6.7%) 14.8% ( 1% - 29%) 0.000
apache · Nov 5, 2021 · 1a71469 · 1a71469
1 parent 2ba435e
commit 1a71469
Showing 1 changed file with 43 additions and 25 deletions.
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
@@ -29,7 +29,6 @@
 import java.util.Objects;
 import java.util.Set;
 import java.util.TreeMap;
-import java.util.stream.Collectors;
 
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
@@ -425,21 +424,23 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
     public Scorer scorer(LeafReaderContext context) throws IOException {
       List<PostingsEnum> iterators = new ArrayList<>();
       List<FieldAndWeight> fields = new ArrayList<>();
-      Map<String, List<ImpactsEnum>> fieldImpacts = new HashMap<>();
+      Map<String, List<ImpactsEnum>> fieldImpactsEnum = new HashMap<>();
+      Map<String, List<Impacts>> fieldImpacts = new HashMap<>();
 
       for (int i = 0; i < fieldTerms.length; i++) {
         TermState state = termStates[i].get(context);
         if (state != null) {
           String fieldName = fieldTerms[i].field();
           fields.add(fieldAndWeights.get(fieldName));
-          fieldImpacts.putIfAbsent(fieldName, new ArrayList<>());
+          fieldImpactsEnum.putIfAbsent(fieldName, new ArrayList<>());
 
           TermsEnum termsEnum = context.reader().terms(fieldName).iterator();
           termsEnum.seekExact(fieldTerms[i].bytes(), state);
+
           if (scoreMode == ScoreMode.TOP_SCORES) {
             ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
             iterators.add(impactsEnum);
-            fieldImpacts.get(fieldName).add(impactsEnum);
+            fieldImpactsEnum.get(fieldName).add(impactsEnum);
           } else {
             PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
             iterators.add(postingsEnum);
@@ -477,7 +478,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
       ImpactsDISI impactsDisi = null;
 
       if (scoreMode == ScoreMode.TOP_SCORES) {
-        ImpactsSource impactsSource = mergeImpacts(fieldImpacts, fieldWeights);
+        ImpactsSource impactsSource = mergeImpacts(fieldImpactsEnum, fieldImpacts, fieldWeights);
         iterator = impactsDisi = new ImpactsDISI(iterator, impactsSource, simWeight);
       }
 
@@ -491,7 +492,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
   }
 
   /** Merge impacts for combined field. */
-  static ImpactsSource mergeImpacts(Map<String, List<ImpactsEnum>> fieldsWithImpactsEnums, Map<String, Float> fieldWeights) {
+  static ImpactsSource mergeImpacts(Map<String, List<ImpactsEnum>> fieldsWithImpactsEnums, Map<String, List<Impacts>> fieldsWithImpacts, Map<String, Float> fieldWeights) {
     return new ImpactsSource() {
 
       class SubIterator {
@@ -514,15 +515,12 @@ void next() {
         }
       }
 
-      Map<String, List<Impacts>> fieldsWithImpacts;
 
       @Override
       public Impacts getImpacts() throws IOException {
-        fieldsWithImpacts = new HashMap<>();
-
         // Use the impacts that have the lower next boundary (doc id in skip entry) as a lead for each field
         // They collectively will decide on the number of levels and the block boundaries.
-        Map<String, Impacts> leadingImpactsPerField = new HashMap<>(fieldsWithImpactsEnums.keySet().size());
+        Map<String, Impacts> leadingImpactsPerField = new HashMap<>(fieldsWithImpactsEnums.size());
 
         for (Map.Entry<String, List<ImpactsEnum>> fieldImpacts : fieldsWithImpactsEnums.entrySet()) {
           String field = fieldImpacts.getKey();
@@ -549,13 +547,27 @@ public Impacts getImpacts() throws IOException {
           @Override
           public int numLevels() {
             // max of levels across fields' impactEnums
-            return leadingImpactsPerField.values().stream().map(Impacts::numLevels).max(Integer::compareTo).get();
+            int result = 0;
+
+            for (Impacts impacts : leadingImpactsPerField.values()) {
+                result = Math.max(result, impacts.numLevels());
+            }
+
+            return result;
           }
 
           @Override
           public int getDocIdUpTo(int level) {
             // min of docIdUpTo across fields' impactEnums
-            return leadingImpactsPerField.values().stream().filter(i -> i.numLevels() > level).map(i -> i.getDocIdUpTo(level)).min(Integer::compareTo).get();
+            int result = Integer.MAX_VALUE;
+
+            for (Impacts impacts : leadingImpactsPerField.values()) {
+              if (impacts.numLevels() > level) {
+                result = Math.min(result, impacts.getDocIdUpTo(level));
+              }
+            }
+
+            return result;
           }
 
           @Override
@@ -567,7 +579,7 @@ public List<Impact> getImpacts(int level) {
           }
 
           private Map<String, List<Impact>> mergeImpactsPerField(int docIdUpTo) {
-            final Map<String, List<Impact>> result = new HashMap<>();
+            final Map<String, List<Impact>> result = new HashMap<>(fieldsWithImpactsEnums.size());
 
             for (Map.Entry<String, List<ImpactsEnum>> impactsPerField : fieldsWithImpactsEnums.entrySet()) {
               String field = impactsPerField.getKey();
@@ -593,7 +605,7 @@ private Map<String, List<Impact>> mergeImpactsPerField(int docIdUpTo) {
 
           // Merge impacts from same field by summing freqs with the same norms - the same logic used for SynonymQuery
           private List<Impact> doMergeImpactsPerField(String field, List<ImpactsEnum> impactsEnums, List<Impacts> impacts, int docIdUpTo) {
-            List<List<Impact>> toMerge = new ArrayList<>();
+            List<List<Impact>> toMerge = new ArrayList<>(impactsEnums.size());
 
             for (int i = 0; i < impactsEnums.size(); ++i) {
               if (impactsEnums.get(i).docID() <= docIdUpTo) {
@@ -603,20 +615,19 @@ private List<Impact> doMergeImpactsPerField(String field, List<ImpactsEnum> impa
                   // return impacts that trigger the maximum score
                   return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
                 }
-                final List<Impact> impactList;
                 float weight = fieldWeights.get(field);
                 if (weight != 1f) {
-                  impactList =
-                          impacts.get(i).getImpacts(impactsLevel).stream()
-                                  .map(
-                                          impact ->
-                                                  new Impact((int) Math.ceil(impact.freq * weight),
-                                                          SmallFloat.intToByte4((int) Math.floor(normToLength(impact.norm) * weight))))
-                                  .collect(Collectors.toList());
+                  final List<Impact> originalImpactList = impacts.get(i).getImpacts(impactsLevel);
+                  final List<Impact> impactList = new ArrayList<>(originalImpactList.size());
+                  for (Impact impact : originalImpactList) {
+                    impactList.add(new Impact((int) Math.ceil(impact.freq * weight),
+                            SmallFloat.intToByte4((int) Math.floor(normToLength(impact.norm) * weight))));
+
+                  }
+                  toMerge.add(impactList);
                 } else {
-                  impactList = impacts.get(i).getImpacts(impactsLevel);
+                  toMerge.add(impacts.get(i).getImpacts(impactsLevel));
                 }
-                toMerge.add(impactList);
               }
             }
 
@@ -709,7 +720,14 @@ private List<Impact> mergeImpactsAcrossFields(Map<String, List<Impact>> mergedIm
               minNorm = Math.min(minNorm, impacts.get(0).norm);
             }
 
-            return Collections.singletonList(new Impact(maxFreq * mergedImpactsPerField.size(), minNorm));
+            int amplifiedMaxFreq = maxFreq * mergedImpactsPerField.size();
+
+            // overflow
+            if (amplifiedMaxFreq < 0) {
+              amplifiedMaxFreq = Integer.MAX_VALUE;
+            }
+
+            return Collections.singletonList(new Impact(amplifiedMaxFreq, minNorm));
           }