Skip to content

Commit

Permalink
Bug fix and optimize per CPU profiler
Browse files Browse the repository at this point in the history
Run 1
                            TaskQPS baseline      StdDevQPS my_modified_version      StdDev                Pct diff p-value
                 CFQHighHighHigh        4.64      (6.5%)        3.30      (4.7%)  -29.0% ( -37% -  -19%) 0.000
                     CFQHighHigh       11.09      (6.0%)        9.61      (6.0%)  -13.3% ( -23% -   -1%) 0.000
                        PKLookup      103.38      (4.4%)      108.04      (4.3%)    4.5% (  -4% -   13%) 0.001
                   CFQHighMedLow       10.58      (6.1%)       12.30      (8.7%)   16.2% (   1% -   33%) 0.000
                      CFQHighMed       10.70      (7.4%)       15.51     (11.2%)   44.9% (  24% -   68%) 0.000
                   CFQHighLowLow        8.18      (8.2%)       12.87     (11.6%)   57.3% (  34% -   84%) 0.000
                      CFQHighLow       14.57      (7.5%)       30.81     (15.1%)  111.4% (  82% -  144%) 0.000

Run 2
                            TaskQPS baseline      StdDevQPS my_modified_version      StdDev                Pct diff p-value
                 CFQHighHighHigh        5.33      (5.7%)        4.02      (7.7%)  -24.4% ( -35% -  -11%) 0.000
                   CFQHighLowLow       17.14      (6.2%)       13.06      (5.4%)  -23.8% ( -33% -  -13%) 0.000
                      CFQHighMed       17.37      (5.8%)       14.38      (7.7%)  -17.2% ( -29% -   -3%) 0.000
                        PKLookup      103.57      (5.5%)      108.84      (5.9%)    5.1% (  -6% -   17%) 0.005
                   CFQHighMedLow       11.25      (7.2%)       12.70      (9.0%)   12.9% (  -3% -   31%) 0.000
                     CFQHighHigh        5.00      (6.2%)        7.54     (12.1%)   51.0% (  30% -   73%) 0.000
                      CFQHighLow       21.60      (5.2%)       34.57     (14.1%)   60.0% (  38% -   83%) 0.000

Run 3
                            TaskQPS baseline      StdDevQPS my_modified_version      StdDev                Pct diff p-value
                 CFQHighHighHigh        5.40      (6.9%)        4.06      (5.1%)  -24.8% ( -34% -  -13%) 0.000
                   CFQHighMedLow        7.64      (7.4%)        5.79      (6.3%)  -24.2% ( -35% -  -11%) 0.000
                     CFQHighHigh       11.11      (7.0%)        9.60      (5.9%)  -13.6% ( -24% -    0%) 0.000
                   CFQHighLowLow       21.21      (7.6%)       21.22      (6.6%)    0.0% ( -13% -   15%) 0.993
                        PKLookup      103.15      (5.9%)      107.60      (6.9%)    4.3% (  -8% -   18%) 0.034
                      CFQHighLow       21.85      (8.1%)       34.18     (13.5%)   56.4% (  32% -   84%) 0.000
                      CFQHighMed       12.07      (8.4%)       19.98     (16.7%)   65.5% (  37% -   98%) 0.000

Run 4
                            TaskQPS baseline      StdDevQPS my_modified_version      StdDev                Pct diff p-value
                     CFQHighHigh        8.50      (5.8%)        6.85      (5.2%)  -19.5% ( -28% -   -8%) 0.000
                   CFQHighMedLow       10.89      (5.7%)        8.96      (5.4%)  -17.8% ( -27% -   -7%) 0.000
                      CFQHighMed        8.41      (5.8%)        7.74      (5.6%)   -7.9% ( -18% -    3%) 0.000
                 CFQHighHighHigh        3.45      (6.7%)        3.38      (5.3%)   -2.0% ( -13% -   10%) 0.287
                   CFQHighLowLow        7.82      (6.4%)        8.20      (7.5%)    4.8% (  -8% -   20%) 0.030
                        PKLookup      103.50      (5.0%)      110.69      (5.4%)    6.9% (  -3% -   18%) 0.000
                      CFQHighLow       11.46      (6.0%)       13.16      (6.7%)   14.8% (   1% -   29%) 0.000
  • Loading branch information
zacharymorn committed Nov 5, 2021
1 parent 2ba435e commit 1a71469
Showing 1 changed file with 43 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
Expand Down Expand Up @@ -425,21 +424,23 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
public Scorer scorer(LeafReaderContext context) throws IOException {
List<PostingsEnum> iterators = new ArrayList<>();
List<FieldAndWeight> fields = new ArrayList<>();
Map<String, List<ImpactsEnum>> fieldImpacts = new HashMap<>();
Map<String, List<ImpactsEnum>> fieldImpactsEnum = new HashMap<>();
Map<String, List<Impacts>> fieldImpacts = new HashMap<>();

for (int i = 0; i < fieldTerms.length; i++) {
TermState state = termStates[i].get(context);
if (state != null) {
String fieldName = fieldTerms[i].field();
fields.add(fieldAndWeights.get(fieldName));
fieldImpacts.putIfAbsent(fieldName, new ArrayList<>());
fieldImpactsEnum.putIfAbsent(fieldName, new ArrayList<>());

TermsEnum termsEnum = context.reader().terms(fieldName).iterator();
termsEnum.seekExact(fieldTerms[i].bytes(), state);

if (scoreMode == ScoreMode.TOP_SCORES) {
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
iterators.add(impactsEnum);
fieldImpacts.get(fieldName).add(impactsEnum);
fieldImpactsEnum.get(fieldName).add(impactsEnum);
} else {
PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
iterators.add(postingsEnum);
Expand Down Expand Up @@ -477,7 +478,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
ImpactsDISI impactsDisi = null;

if (scoreMode == ScoreMode.TOP_SCORES) {
ImpactsSource impactsSource = mergeImpacts(fieldImpacts, fieldWeights);
ImpactsSource impactsSource = mergeImpacts(fieldImpactsEnum, fieldImpacts, fieldWeights);
iterator = impactsDisi = new ImpactsDISI(iterator, impactsSource, simWeight);
}

Expand All @@ -491,7 +492,7 @@ public boolean isCacheable(LeafReaderContext ctx) {
}

/** Merge impacts for combined field. */
static ImpactsSource mergeImpacts(Map<String, List<ImpactsEnum>> fieldsWithImpactsEnums, Map<String, Float> fieldWeights) {
static ImpactsSource mergeImpacts(Map<String, List<ImpactsEnum>> fieldsWithImpactsEnums, Map<String, List<Impacts>> fieldsWithImpacts, Map<String, Float> fieldWeights) {
return new ImpactsSource() {

class SubIterator {
Expand All @@ -514,15 +515,12 @@ void next() {
}
}

Map<String, List<Impacts>> fieldsWithImpacts;

@Override
public Impacts getImpacts() throws IOException {
fieldsWithImpacts = new HashMap<>();

// Use the impacts that have the lower next boundary (doc id in skip entry) as a lead for each field
// They collectively will decide on the number of levels and the block boundaries.
Map<String, Impacts> leadingImpactsPerField = new HashMap<>(fieldsWithImpactsEnums.keySet().size());
Map<String, Impacts> leadingImpactsPerField = new HashMap<>(fieldsWithImpactsEnums.size());

for (Map.Entry<String, List<ImpactsEnum>> fieldImpacts : fieldsWithImpactsEnums.entrySet()) {
String field = fieldImpacts.getKey();
Expand All @@ -549,13 +547,27 @@ public Impacts getImpacts() throws IOException {
@Override
public int numLevels() {
// max of levels across fields' impactEnums
return leadingImpactsPerField.values().stream().map(Impacts::numLevels).max(Integer::compareTo).get();
int result = 0;

for (Impacts impacts : leadingImpactsPerField.values()) {
result = Math.max(result, impacts.numLevels());
}

return result;
}

@Override
public int getDocIdUpTo(int level) {
// min of docIdUpTo across fields' impactEnums
return leadingImpactsPerField.values().stream().filter(i -> i.numLevels() > level).map(i -> i.getDocIdUpTo(level)).min(Integer::compareTo).get();
int result = Integer.MAX_VALUE;

for (Impacts impacts : leadingImpactsPerField.values()) {
if (impacts.numLevels() > level) {
result = Math.min(result, impacts.getDocIdUpTo(level));
}
}

return result;
}

@Override
Expand All @@ -567,7 +579,7 @@ public List<Impact> getImpacts(int level) {
}

private Map<String, List<Impact>> mergeImpactsPerField(int docIdUpTo) {
final Map<String, List<Impact>> result = new HashMap<>();
final Map<String, List<Impact>> result = new HashMap<>(fieldsWithImpactsEnums.size());

for (Map.Entry<String, List<ImpactsEnum>> impactsPerField : fieldsWithImpactsEnums.entrySet()) {
String field = impactsPerField.getKey();
Expand All @@ -593,7 +605,7 @@ private Map<String, List<Impact>> mergeImpactsPerField(int docIdUpTo) {

// Merge impacts from same field by summing freqs with the same norms - the same logic used for SynonymQuery
private List<Impact> doMergeImpactsPerField(String field, List<ImpactsEnum> impactsEnums, List<Impacts> impacts, int docIdUpTo) {
List<List<Impact>> toMerge = new ArrayList<>();
List<List<Impact>> toMerge = new ArrayList<>(impactsEnums.size());

for (int i = 0; i < impactsEnums.size(); ++i) {
if (impactsEnums.get(i).docID() <= docIdUpTo) {
Expand All @@ -603,20 +615,19 @@ private List<Impact> doMergeImpactsPerField(String field, List<ImpactsEnum> impa
// return impacts that trigger the maximum score
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
}
final List<Impact> impactList;
float weight = fieldWeights.get(field);
if (weight != 1f) {
impactList =
impacts.get(i).getImpacts(impactsLevel).stream()
.map(
impact ->
new Impact((int) Math.ceil(impact.freq * weight),
SmallFloat.intToByte4((int) Math.floor(normToLength(impact.norm) * weight))))
.collect(Collectors.toList());
final List<Impact> originalImpactList = impacts.get(i).getImpacts(impactsLevel);
final List<Impact> impactList = new ArrayList<>(originalImpactList.size());
for (Impact impact : originalImpactList) {
impactList.add(new Impact((int) Math.ceil(impact.freq * weight),
SmallFloat.intToByte4((int) Math.floor(normToLength(impact.norm) * weight))));

}
toMerge.add(impactList);
} else {
impactList = impacts.get(i).getImpacts(impactsLevel);
toMerge.add(impacts.get(i).getImpacts(impactsLevel));
}
toMerge.add(impactList);
}
}

Expand Down Expand Up @@ -709,7 +720,14 @@ private List<Impact> mergeImpactsAcrossFields(Map<String, List<Impact>> mergedIm
minNorm = Math.min(minNorm, impacts.get(0).norm);
}

return Collections.singletonList(new Impact(maxFreq * mergedImpactsPerField.size(), minNorm));
int amplifiedMaxFreq = maxFreq * mergedImpactsPerField.size();

// overflow
if (amplifiedMaxFreq < 0) {
amplifiedMaxFreq = Integer.MAX_VALUE;
}

return Collections.singletonList(new Impact(amplifiedMaxFreq, minNorm));
}


Expand Down

0 comments on commit 1a71469

Please sign in to comment.