apache · mikemccand · Jun 24, 2021 · Apr 29, 2021 · Apr 29, 2021 · May 11, 2021
diff --git a/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg b/lucene/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
@@ -53,7 +53,7 @@ log.queries=true
 
     { "Populate"
         CreateIndex
-        [{ "MAddDocs" AddDoc } : 5000] : 4
+        [{ {{"MAddDocs" AddDoc } : 5000} FlushIndex } ] : 8
         ForceMerge(1)
         CloseIndex
     }

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
@@ -50,8 +50,8 @@ private static final class DateFormatInfo {
   private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<>();
   private Path dataDir = null;
   private ArrayList<Path> inputFiles = new ArrayList<>();
-  private int nextFile = 0;
-  private int iteration = 0;
+  private int[] docCountArr;
+  private volatile boolean docCountArrCreated;
 
   @Override
   public void setConfig(Config config) {
@@ -100,21 +100,24 @@ public void close() throws IOException {
 
   @Override
   public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
-    Path f = null;
-    String name = null;
-    synchronized (this) {
-      if (nextFile >= inputFiles.size()) {
-        // exhausted files, start a new round, unless forever set to false.
-        if (!forever) {
-          throw new NoMoreDataException();
-        }
-        nextFile = 0;
-        iteration++;
-      }
-      f = inputFiles.get(nextFile++);
-      name = f.toRealPath() + "_" + iteration;
+    if (docCountArrCreated == false) {
+      docCountArrInit();
     }
 
+    //Extract ThreadIndex from unique ThreadName (at position 12), which is set with '"IndexThread-"+index', in TaskSequence.java's doParallelTasks()
+    int threadIndex = Integer.parseInt(Thread.currentThread().getName().substring(12));
+    assert (threadIndex >= 0 && threadIndex < docCountArr.length):"Please check threadIndex or docCountArr length";
+    int stride = threadIndex + docCountArr[threadIndex] * docCountArr.length;
+    int inFileSize = inputFiles.size();
+
+    //Modulo Operator covers all three possible senarios i.e. 1. If inputFiles.size() < Num Of Threads 2.inputFiles.size() == Num Of Threads 3.inputFiles.size() > Num Of Threads
+    int fileIndex = stride % inFileSize;
+    int iteration = stride / inFileSize;
+    docCountArr[threadIndex]++;
+
+    Path f = inputFiles.get(fileIndex);
+    String name = f.toRealPath() + "_" + iteration;
+
     try (BufferedReader reader = Files.newBufferedReader(f, StandardCharsets.UTF_8)) {
       // First line is the date, 3rd is the title, rest is body
       String dateStr = reader.readLine();
@@ -143,7 +146,12 @@ public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOExc
   @Override
   public synchronized void resetInputs() throws IOException {
     super.resetInputs();
-    nextFile = 0;
-    iteration = 0;
+  }
+
+  private synchronized void docCountArrInit() {
+    if (docCountArrCreated == false) {
+      docCountArr = new int[getConfig().getNumThreads()];
+      docCountArrCreated = true;
+    }
   }
 }
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/FlushIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/FlushIndexTask.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.byTask.tasks;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.index.IndexWriter;
+
+/** Flush Index Task uses flushNextBuffer() to flush documents at thread level */
+public class FlushIndexTask extends PerfTask {
+
+  public FlushIndexTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws Exception {
+    IndexWriter iw = getRunData().getIndexWriter();
+    if (iw != null) {
+      iw.flushNextBuffer();
+    }
+    return 1;
+  }
+}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
@@ -340,12 +340,17 @@ private int doParallelTasks() throws Exception {
 
     initTasksArray();
     ParallelTask t[] = runningParallelTasks = new ParallelTask[repetitions * tasks.size()];
+    //Get number of parallel threads from algo file and set it to use in ReuersContentSource.java's docCountArrInit()
+    this.getRunData().getConfig().setNumThreads(t.length);
     // prepare threads
     int index = 0;
     for (int k = 0; k < repetitions; k++) {
       for (int i = 0; i < tasksArray.length; i++) {
         final PerfTask task = tasksArray[i].clone();
-        t[index++] = new ParallelTask(task);
+        t[index] = new ParallelTask(task);
+        //Setting unique ThreadName with index value which is used in ReuersContentSource.java's getNextDocData()
+        t[index].setName("IndexThread-" + index);
+        index++;
       }
     }
     // run threads

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
@@ -54,6 +54,7 @@ public class Config {
   private HashMap<String, Object> valByRound = new HashMap<>();
   private HashMap<String, String> colForValByRound = new HashMap<>();
   private String algorithmText;
+  private int numThreads = 1;
 
   /**
    * Read both algorithm and config properties.
@@ -113,6 +114,14 @@ public Config(Properties props) {
     }
   }
 
+  public void setNumThreads(int numThreads) {
+    this.numThreads = numThreads;
+  }
+
+  public int getNumThreads() {
+    return numThreads;
+  }
+
   @SuppressWarnings({"unchecked", "rawtypes"})
   private void printProps() {
     System.out.println("------------> config properties:");