diff --git a/pom.xml b/pom.xml
index e66f27f94f..a002094962 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,4 +1,5 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <groupId>anserini</groupId>
   <artifactId>ingester</artifactId>
@@ -71,11 +72,13 @@
   </build>
 
   <dependencies>
+
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-benchmark</artifactId>
-	<version>${LUCENE_VERSION}</version>
+      <version>${LUCENE_VERSION}</version>
     </dependency>
+
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-core</artifactId>
@@ -87,16 +90,13 @@
       <artifactId>commons-logging</artifactId>
       <version>1.2</version>
     </dependency>
+
     <dependency>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpclient</artifactId>
       <version>4.5.1</version>
     </dependency>
-    <dependency>
-      <groupId>commons-cli</groupId>
-      <artifactId>commons-cli</artifactId>
-      <version>1.2</version>
-    </dependency>
+
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
@@ -126,6 +126,7 @@
       <artifactId>log4j-api</artifactId>
       <version>2.4</version>
     </dependency>
+
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-core</artifactId>
@@ -137,20 +138,24 @@
       <artifactId>gson</artifactId>
       <version>2.4</version>
     </dependency>
+
     <dependency>
       <groupId>org.twitter4j</groupId>
       <artifactId>twitter4j-stream</artifactId>
       <version>4.0.4</version>
     </dependency>
+
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>twitter-text</artifactId>
       <version>1.13.0</version>
     </dependency>
+
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <version>19.0-rc2</version>
     </dependency>
+
   </dependencies>
 </project>
diff --git a/src/main/java/io/anserini/document/ClueWeb09WarcRecord.java b/src/main/java/io/anserini/document/ClueWeb09WarcRecord.java
index 63f7ada340..461fd5d1e9 100644
--- a/src/main/java/io/anserini/document/ClueWeb09WarcRecord.java
+++ b/src/main/java/io/anserini/document/ClueWeb09WarcRecord.java
@@ -46,579 +46,579 @@
 
 public final class ClueWeb09WarcRecord {
 
-    public static String WARC_VERSION = "WARC/0.18";
-    public static String WARC_VERSION_LINE = "WARC/0.18\n";
-    private static String NEWLINE = "\n";
-
-    private static byte MASK_THREE_BYTE_CHAR = (byte) (0xE0);
-    private static byte MASK_TWO_BYTE_CHAR = (byte) (0xC0);
-    private static byte MASK_TOPMOST_BIT = (byte) (0x80);
-    private static byte MASK_BOTTOM_SIX_BITS = (byte) (0x1F);
-    private static byte MASK_BOTTOM_FIVE_BITS = (byte) (0x3F);
-    private static byte MASK_BOTTOM_FOUR_BITS = (byte) (0x0F);
-    private WarcHeader warcHeader = new WarcHeader();
-    private byte[] warcContent = null;
-    private String warcFilePath = "";
-
-    /**
-     * Default Constructor
-     */
-    public ClueWeb09WarcRecord() {
-    }
-
-    /**
-     * Copy Constructor
-     *
-     * @param o
-     */
-    public ClueWeb09WarcRecord(ClueWeb09WarcRecord o) {
-        this.warcHeader = new WarcHeader(o.warcHeader);
-        this.warcContent = o.warcContent;
-    }
-
-    /**
-     * Our read line implementation. We cannot allow buffering here (for gzip
-     * streams) so, we need to use DataInputStream. Also - we need to account
-     * for java's UTF8 implementation
-     *
-     * @param in the input data stream
-     * @return the read line (or null if eof)
-     * @throws java.io.IOException
-     */
-    public static String readLineFromInputStream(DataInputStream in) throws IOException {
-        StringBuilder retString = new StringBuilder();
-
-        boolean keepReading = true;
-        try {
-            do {
-                char thisChar = 0;
-                byte readByte = in.readByte();
-
-                // check to see if it's a multibyte character
-                if ((readByte & MASK_THREE_BYTE_CHAR) == MASK_THREE_BYTE_CHAR) {
-                    // need to read the next 2 bytes
-                    if (in.available() < 2) {
-                        // treat these all as individual characters
-                        retString.append((char) readByte);
-                        int numAvailable = in.available();
-                        for (int i = 0; i < numAvailable; i++) {
-                            retString.append((char) (in.readByte()));
-                        }
-                        continue;
-                    }
-                    byte secondByte = in.readByte();
-                    byte thirdByte = in.readByte();
-                    // ensure the topmost bit is set
-                    if (((secondByte & MASK_TOPMOST_BIT) != MASK_TOPMOST_BIT)
-                            || ((thirdByte & MASK_TOPMOST_BIT) != MASK_TOPMOST_BIT)) {
-                        // treat these as individual characters
-                        retString.append((char) readByte);
-                        retString.append((char) secondByte);
-                        retString.append((char) thirdByte);
-                        continue;
-                    }
-                    int finalVal = (thirdByte & MASK_BOTTOM_FIVE_BITS) + 64
-                            * (secondByte & MASK_BOTTOM_FIVE_BITS) + 4096
-                            * (readByte & MASK_BOTTOM_FOUR_BITS);
-                    thisChar = (char) finalVal;
-                } else if ((readByte & MASK_TWO_BYTE_CHAR) == MASK_TWO_BYTE_CHAR) {
-                    // need to read next byte
-                    if (in.available() < 1) {
-                        // treat this as individual characters
-                        retString.append((char) readByte);
-                        continue;
-                    }
-                    byte secondByte = in.readByte();
-                    if ((secondByte & MASK_TOPMOST_BIT) != MASK_TOPMOST_BIT) {
-                        retString.append((char) readByte);
-                        retString.append((char) secondByte);
-                        continue;
-                    }
-                    int finalVal = (secondByte & MASK_BOTTOM_FIVE_BITS) + 64
-                            * (readByte & MASK_BOTTOM_SIX_BITS);
-                    thisChar = (char) finalVal;
-                } else {
-                    // interpret it as a single byte
-                    thisChar = (char) readByte;
-                }
-
-                if (thisChar == '\n') {
-                    keepReading = false;
-                } else {
-                    retString.append(thisChar);
-                }
-            } while (keepReading);
-        } catch (EOFException eofEx) {
-            return null;
-        }
-
-        if (retString.length() == 0) {
-            return "";
-        }
-
-        return retString.toString();
-    }
-
-    /**
-     * The actual heavy lifting of reading in the next WARC record
-     *
-     * @param in           the data input stream
-     * @param headerBuffer a blank string buffer to contain the WARC header
-     * @return the content byts (w/ the headerBuffer populated)
-     * @throws java.io.IOException
-     */
-    private static byte[] readNextRecord(DataInputStream in, StringBuilder headerBuffer)
-            throws IOException {
-        if (in == null) {
-            return null;
-        }
-        if (headerBuffer == null) {
-            return null;
-        }
-
-        String line = null;
-        boolean foundMark = false;
-        boolean inHeader = true;
-        byte[] retContent = null;
-
-        // cannot be using a buffered reader here!!!!
-        // just read the header
-        // first - find our WARC header
-        while ((!foundMark) && ((line = readLineFromInputStream(in)) != null)) {
-            if (line.startsWith(WARC_VERSION)) {
-                foundMark = true;
+  public static String WARC_VERSION = "WARC/0.18";
+  public static String WARC_VERSION_LINE = "WARC/0.18\n";
+  private static String NEWLINE = "\n";
+
+  private static byte MASK_THREE_BYTE_CHAR = (byte) (0xE0);
+  private static byte MASK_TWO_BYTE_CHAR = (byte) (0xC0);
+  private static byte MASK_TOPMOST_BIT = (byte) (0x80);
+  private static byte MASK_BOTTOM_SIX_BITS = (byte) (0x1F);
+  private static byte MASK_BOTTOM_FIVE_BITS = (byte) (0x3F);
+  private static byte MASK_BOTTOM_FOUR_BITS = (byte) (0x0F);
+  private WarcHeader warcHeader = new WarcHeader();
+  private byte[] warcContent = null;
+  private String warcFilePath = "";
+
+  /**
+   * Default Constructor
+   */
+  public ClueWeb09WarcRecord() {
+  }
+
+  /**
+   * Copy Constructor
+   *
+   * @param o
+   */
+  public ClueWeb09WarcRecord(ClueWeb09WarcRecord o) {
+    this.warcHeader = new WarcHeader(o.warcHeader);
+    this.warcContent = o.warcContent;
+  }
+
+  /**
+   * Our read line implementation. We cannot allow buffering here (for gzip
+   * streams) so, we need to use DataInputStream. Also - we need to account
+   * for java's UTF8 implementation
+   *
+   * @param in the input data stream
+   * @return the read line (or null if eof)
+   * @throws java.io.IOException
+   */
+  public static String readLineFromInputStream(DataInputStream in) throws IOException {
+    StringBuilder retString = new StringBuilder();
+
+    boolean keepReading = true;
+    try {
+      do {
+        char thisChar = 0;
+        byte readByte = in.readByte();
+
+        // check to see if it's a multibyte character
+        if ((readByte & MASK_THREE_BYTE_CHAR) == MASK_THREE_BYTE_CHAR) {
+          // need to read the next 2 bytes
+          if (in.available() < 2) {
+            // treat these all as individual characters
+            retString.append((char) readByte);
+            int numAvailable = in.available();
+            for (int i = 0; i < numAvailable; i++) {
+              retString.append((char) (in.readByte()));
             }
+            continue;
+          }
+          byte secondByte = in.readByte();
+          byte thirdByte = in.readByte();
+          // ensure the topmost bit is set
+          if (((secondByte & MASK_TOPMOST_BIT) != MASK_TOPMOST_BIT)
+                  || ((thirdByte & MASK_TOPMOST_BIT) != MASK_TOPMOST_BIT)) {
+            // treat these as individual characters
+            retString.append((char) readByte);
+            retString.append((char) secondByte);
+            retString.append((char) thirdByte);
+            continue;
+          }
+          int finalVal = (thirdByte & MASK_BOTTOM_FIVE_BITS) + 64
+                  * (secondByte & MASK_BOTTOM_FIVE_BITS) + 4096
+                  * (readByte & MASK_BOTTOM_FOUR_BITS);
+          thisChar = (char) finalVal;
+        } else if ((readByte & MASK_TWO_BYTE_CHAR) == MASK_TWO_BYTE_CHAR) {
+          // need to read next byte
+          if (in.available() < 1) {
+            // treat this as individual characters
+            retString.append((char) readByte);
+            continue;
+          }
+          byte secondByte = in.readByte();
+          if ((secondByte & MASK_TOPMOST_BIT) != MASK_TOPMOST_BIT) {
+            retString.append((char) readByte);
+            retString.append((char) secondByte);
+            continue;
+          }
+          int finalVal = (secondByte & MASK_BOTTOM_FIVE_BITS) + 64
+                  * (readByte & MASK_BOTTOM_SIX_BITS);
+          thisChar = (char) finalVal;
+        } else {
+          // interpret it as a single byte
+          thisChar = (char) readByte;
         }
 
-        // no WARC mark?
-        if (!foundMark) {
-            return null;
-        }
-
-        // then read to the first newline
-        // make sure we get the content length here
-        int contentLength = -1;
-        boolean foundContentLength = false;
-        while (!foundContentLength && inHeader && ((line = readLineFromInputStream(in)) != null)) {
-            if ((line.trim().length() == 0) && foundContentLength) {
-                inHeader = false;
-            } else {
-                headerBuffer.append(line);
-                headerBuffer.append(NEWLINE);
-                String[] thisHeaderPieceParts = line.split(":", 2);
-                if (thisHeaderPieceParts.length == 2) {
-                    if (thisHeaderPieceParts[0].toLowerCase(Locale.US).startsWith("content-length")) {
-                        foundContentLength = true;
-                        try {
-                            contentLength = Integer.parseInt(thisHeaderPieceParts[1].trim());
-                        } catch (NumberFormatException nfEx) {
-                            contentLength = -1;
-                        }
-                    }
-                }
-            }
-        }
-
-        if (contentLength < 0) {
-            return null;
-        }
-
-        // now read the bytes of the content
-        retContent = new byte[contentLength];
-        int totalWant = contentLength;
-        int totalRead = 0;
-        while (totalRead < contentLength) {
-            try {
-                int numRead = in.read(retContent, totalRead, totalWant);
-                if (numRead < 0) {
-                    return null;
-                } else {
-                    totalRead += numRead;
-                    totalWant = contentLength - totalRead;
-                } // end if (numRead < 0) / else
-            } catch (EOFException eofEx) {
-                // resize to what we have
-                if (totalRead > 0) {
-                    byte[] newReturn = new byte[totalRead];
-                    System.arraycopy(retContent, 0, newReturn, 0, totalRead);
-                    return newReturn;
-                } else {
-                    return null;
-                }
-            } // end try/catch (EOFException)
-        } // end while (totalRead < contentLength)
-
-        return retContent;
-    }
-
-    /**
-     * Reads in a WARC record from a data input stream
-     *
-     * @param in the input stream
-     * @return a WARC record (or null if eof)
-     * @throws java.io.IOException
-     */
-    public static ClueWeb09WarcRecord readNextWarcRecord(DataInputStream in) throws IOException {
-        StringBuilder recordHeader = new StringBuilder();
-        byte[] recordContent = readNextRecord(in, recordHeader);
-        if (recordContent == null) {
-            return null;
-        }
-
-        // extract out our header information
-        String thisHeaderString = recordHeader.toString();
-        String[] headerLines = thisHeaderString.split(NEWLINE);
-
-        ClueWeb09WarcRecord retRecord = new ClueWeb09WarcRecord();
-        for (int i = 0; i < headerLines.length; i++) {
-            String[] pieces = headerLines[i].split(":", 2);
-            if (pieces.length != 2) {
-                retRecord.addHeaderMetadata(pieces[0], "");
-                continue;
-            }
-            String thisKey = pieces[0].trim();
-            String thisValue = pieces[1].trim();
-
-            // check for known keys
-            if (thisKey.equals("WARC-Type")) {
-                retRecord.setWarcRecordType(thisValue);
-            } else if (thisKey.equals("WARC-Date")) {
-                retRecord.setWarcDate(thisValue);
-            } else if (thisKey.equals("WARC-Record-ID")) {
-                retRecord.setWarcUUID(thisValue);
-            } else if (thisKey.equals("Content-Type")) {
-                retRecord.setWarcContentType(thisValue);
-            } else {
-                retRecord.addHeaderMetadata(thisKey, thisValue);
-            }
+        if (thisChar == '\n') {
+          keepReading = false;
+        } else {
+          retString.append(thisChar);
         }
-
-        // set the content
-        retRecord.setContent(recordContent);
-
-        return retRecord;
-    }
-
-    /**
-     * Retrieves the total record length (header and content)
-     *
-     * @return total record length
-     */
-    public int getTotalRecordLength() {
-        int headerLength = warcHeader.toString().length();
-        return (headerLength + warcContent.length);
+      } while (keepReading);
+    } catch (EOFException eofEx) {
+      return null;
     }
 
-    /**
-     * Sets the record content (copy)
-     *
-     * @param o record to copy from
-     */
-    public void set(ClueWeb09WarcRecord o) {
-        this.warcHeader = new WarcHeader(o.warcHeader);
-        this.warcContent = o.warcContent;
+    if (retString.length() == 0) {
+      return "";
     }
 
-    /**
-     * Gets the file path from this WARC file (if set)
-     */
-    public String getWarcFilePath() {
-        return warcFilePath;
+    return retString.toString();
+  }
+
+  /**
+   * The actual heavy lifting of reading in the next WARC record
+   *
+   * @param in           the data input stream
+   * @param headerBuffer a blank string buffer to contain the WARC header
+   * @return the content byts (w/ the headerBuffer populated)
+   * @throws java.io.IOException
+   */
+  private static byte[] readNextRecord(DataInputStream in, StringBuilder headerBuffer)
+          throws IOException {
+    if (in == null) {
+      return null;
     }
-
-    /**
-     * Sets the warc file path (optional - for use with getWarcFilePath)
-     *
-     * @param path
-     */
-    public void setWarcFilePath(String path) {
-        warcFilePath = path;
+    if (headerBuffer == null) {
+      return null;
     }
 
-    /**
-     * Sets the record type string
-     *
-     * @param recordType
-     */
-    public void setWarcRecordType(String recordType) {
-        warcHeader.recordType = recordType;
+    String line = null;
+    boolean foundMark = false;
+    boolean inHeader = true;
+    byte[] retContent = null;
+
+    // cannot be using a buffered reader here!!!!
+    // just read the header
+    // first - find our WARC header
+    while ((!foundMark) && ((line = readLineFromInputStream(in)) != null)) {
+      if (line.startsWith(WARC_VERSION)) {
+        foundMark = true;
+      }
     }
 
-    /**
-     * Sets the content type string
-     *
-     * @param contentType
-     */
-    public void setWarcContentType(String contentType) {
-        warcHeader.contentType = contentType;
+    // no WARC mark?
+    if (!foundMark) {
+      return null;
     }
 
-    /**
-     * Sets the WARC header date string
-     *
-     * @param dateString
-     */
-    public void setWarcDate(String dateString) {
-        warcHeader.dateString = dateString;
+    // then read to the first newline
+    // make sure we get the content length here
+    int contentLength = -1;
+    boolean foundContentLength = false;
+    while (!foundContentLength && inHeader && ((line = readLineFromInputStream(in)) != null)) {
+      if ((line.trim().length() == 0) && foundContentLength) {
+        inHeader = false;
+      } else {
+        headerBuffer.append(line);
+        headerBuffer.append(NEWLINE);
+        String[] thisHeaderPieceParts = line.split(":", 2);
+        if (thisHeaderPieceParts.length == 2) {
+          if (thisHeaderPieceParts[0].toLowerCase(Locale.US).startsWith("content-length")) {
+            foundContentLength = true;
+            try {
+              contentLength = Integer.parseInt(thisHeaderPieceParts[1].trim());
+            } catch (NumberFormatException nfEx) {
+              contentLength = -1;
+            }
+          }
+        }
+      }
     }
 
-    /**
-     * Sets the WARC uuid string
-     *
-     * @param UUID
-     */
-    public void setWarcUUID(String UUID) {
-        warcHeader.UUID = UUID;
+    if (contentLength < 0) {
+      return null;
     }
 
-    /**
-     * Adds a key/value pair to a WARC header. This is needed to filter out
-     * known keys
-     *
-     * @param key
-     * @param value
-     */
-    public void addHeaderMetadata(String key, String value) {
-        // don't allow addition of known keys
-        if (key.equals("WARC-Type")) {
-            return;
+    // now read the bytes of the content
+    retContent = new byte[contentLength];
+    int totalWant = contentLength;
+    int totalRead = 0;
+    while (totalRead < contentLength) {
+      try {
+        int numRead = in.read(retContent, totalRead, totalWant);
+        if (numRead < 0) {
+          return null;
+        } else {
+          totalRead += numRead;
+          totalWant = contentLength - totalRead;
+        } // end if (numRead < 0) / else
+      } catch (EOFException eofEx) {
+        // resize to what we have
+        if (totalRead > 0) {
+          byte[] newReturn = new byte[totalRead];
+          System.arraycopy(retContent, 0, newReturn, 0, totalRead);
+          return newReturn;
+        } else {
+          return null;
         }
-        if (key.equals("WARC-Date")) {
-            return;
-        }
-        if (key.equals("WARC-Record-ID")) {
-            return;
-        }
-        if (key.equals("Content-Type")) {
-            return;
-        }
-        if (key.equals("Content-Length")) {
-            return;
-        }
-
-        warcHeader.metadata.put(key, value);
+      } // end try/catch (EOFException)
+    } // end while (totalRead < contentLength)
+
+    return retContent;
+  }
+
+  /**
+   * Reads in a WARC record from a data input stream
+   *
+   * @param in the input stream
+   * @return a WARC record (or null if eof)
+   * @throws java.io.IOException
+   */
+  public static ClueWeb09WarcRecord readNextWarcRecord(DataInputStream in) throws IOException {
+    StringBuilder recordHeader = new StringBuilder();
+    byte[] recordContent = readNextRecord(in, recordHeader);
+    if (recordContent == null) {
+      return null;
     }
 
-    /**
-     * Clears all metadata items from a header
-     */
-    public void clearHeaderMetadata() {
-        warcHeader.metadata.clear();
+    // extract out our header information
+    String thisHeaderString = recordHeader.toString();
+    String[] headerLines = thisHeaderString.split(NEWLINE);
+
+    ClueWeb09WarcRecord retRecord = new ClueWeb09WarcRecord();
+    for (int i = 0; i < headerLines.length; i++) {
+      String[] pieces = headerLines[i].split(":", 2);
+      if (pieces.length != 2) {
+        retRecord.addHeaderMetadata(pieces[0], "");
+        continue;
+      }
+      String thisKey = pieces[0].trim();
+      String thisValue = pieces[1].trim();
+
+      // check for known keys
+      if (thisKey.equals("WARC-Type")) {
+        retRecord.setWarcRecordType(thisValue);
+      } else if (thisKey.equals("WARC-Date")) {
+        retRecord.setWarcDate(thisValue);
+      } else if (thisKey.equals("WARC-Record-ID")) {
+        retRecord.setWarcUUID(thisValue);
+      } else if (thisKey.equals("Content-Type")) {
+        retRecord.setWarcContentType(thisValue);
+      } else {
+        retRecord.addHeaderMetadata(thisKey, thisValue);
+      }
     }
 
-    /**
-     * Gets the set of metadata items from the header
-     */
-    public Set<Entry<String, String>> getHeaderMetadata() {
-        return warcHeader.metadata.entrySet();
+    // set the content
+    retRecord.setContent(recordContent);
+
+    return retRecord;
+  }
+
+  /**
+   * Retrieves the total record length (header and content)
+   *
+   * @return total record length
+   */
+  public int getTotalRecordLength() {
+    int headerLength = warcHeader.toString().length();
+    return (headerLength + warcContent.length);
+  }
+
+  /**
+   * Sets the record content (copy)
+   *
+   * @param o record to copy from
+   */
+  public void set(ClueWeb09WarcRecord o) {
+    this.warcHeader = new WarcHeader(o.warcHeader);
+    this.warcContent = o.warcContent;
+  }
+
+  /**
+   * Gets the file path from this WARC file (if set)
+   */
+  public String getWarcFilePath() {
+    return warcFilePath;
+  }
+
+  /**
+   * Sets the warc file path (optional - for use with getWarcFilePath)
+   *
+   * @param path
+   */
+  public void setWarcFilePath(String path) {
+    warcFilePath = path;
+  }
+
+  /**
+   * Sets the record type string
+   *
+   * @param recordType
+   */
+  public void setWarcRecordType(String recordType) {
+    warcHeader.recordType = recordType;
+  }
+
+  /**
+   * Sets the content type string
+   *
+   * @param contentType
+   */
+  public void setWarcContentType(String contentType) {
+    warcHeader.contentType = contentType;
+  }
+
+  /**
+   * Sets the WARC header date string
+   *
+   * @param dateString
+   */
+  public void setWarcDate(String dateString) {
+    warcHeader.dateString = dateString;
+  }
+
+  /**
+   * Sets the WARC uuid string
+   *
+   * @param UUID
+   */
+  public void setWarcUUID(String UUID) {
+    warcHeader.UUID = UUID;
+  }
+
+  /**
+   * Adds a key/value pair to a WARC header. This is needed to filter out
+   * known keys
+   *
+   * @param key
+   * @param value
+   */
+  public void addHeaderMetadata(String key, String value) {
+    // don't allow addition of known keys
+    if (key.equals("WARC-Type")) {
+      return;
     }
-
-    /**
-     * Gets a value for a specific header metadata key
-     *
-     * @param key
-     */
-    public String getHeaderMetadataItem(String key) {
-        return warcHeader.metadata.get(key);
+    if (key.equals("WARC-Date")) {
+      return;
     }
-
-    /**
-     * Sets the byte content for this record
-     *
-     * @param content
-     */
-    public void setContent(byte[] content) {
-        warcContent = content;
-        warcHeader.contentLength = content.length;
+    if (key.equals("WARC-Record-ID")) {
+      return;
     }
-
-    /**
-     * Retrieves the byte content for this record
-     */
-    public byte[] getByteContent() {
-        return warcContent;
+    if (key.equals("Content-Type")) {
+      return;
     }
-
-    /**
-     * Retrieves the bytes content as a UTF-8 string
-     */
-    public String getContentUTF8() {
-        return new String(warcContent, StandardCharsets.UTF_8);
+    if (key.equals("Content-Length")) {
+      return;
     }
 
+    warcHeader.metadata.put(key, value);
+  }
+
+  /**
+   * Clears all metadata items from a header
+   */
+  public void clearHeaderMetadata() {
+    warcHeader.metadata.clear();
+  }
+
+  /**
+   * Gets the set of metadata items from the header
+   */
+  public Set<Entry<String, String>> getHeaderMetadata() {
+    return warcHeader.metadata.entrySet();
+  }
+
+  /**
+   * Gets a value for a specific header metadata key
+   *
+   * @param key
+   */
+  public String getHeaderMetadataItem(String key) {
+    return warcHeader.metadata.get(key);
+  }
+
+  /**
+   * Sets the byte content for this record
+   *
+   * @param content
+   */
+  public void setContent(byte[] content) {
+    warcContent = content;
+    warcHeader.contentLength = content.length;
+  }
+
+  /**
+   * Retrieves the byte content for this record
+   */
+  public byte[] getByteContent() {
+    return warcContent;
+  }
+
+  /**
+   * Retrieves the bytes content as a UTF-8 string
+   */
+  public String getContentUTF8() {
+    return new String(warcContent, StandardCharsets.UTF_8);
+  }
+
+  /**
+   * Gets the header record type string
+   */
+  public String getHeaderRecordType() {
+    return warcHeader.recordType;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder retBuffer = new StringBuilder();
+    retBuffer.append(warcHeader.toString());
+    retBuffer.append(NEWLINE);
+    retBuffer.append(warcContent);
+    return retBuffer.toString();
+  }
+
+  /**
+   * Gets the WARC header as a string
+   */
+  public String getHeaderString() {
+    return warcHeader.toString();
+  }
+
+  /**
+   * Serialization output
+   *
+   * @param out
+   * @throws java.io.IOException
+   */
+  public void write(DataOutput out) throws IOException {
+    warcHeader.write(out);
+    out.write(warcContent);
+  }
+
+  /**
+   * Serialization input
+   *
+   * @param in
+   * @throws java.io.IOException
+   */
+  public void readFields(DataInput in) throws IOException {
+    warcHeader.readFields(in);
+    int contentLengthBytes = warcHeader.contentLength;
+    warcContent = new byte[contentLengthBytes];
+    in.readFully(warcContent);
+  }
+
+  public String getDocid() {
+    return getHeaderMetadataItem("WARC-TREC-ID");
+  }
+
+  public String getURL() {
+    return getHeaderMetadataItem("WARC-Target-URI");
+  }
+
+  public String getContent() {
+    String str = getContentUTF8();
+    int i = str.indexOf("Content-Length:");
+    int j = str.indexOf("\n", i);
+
+    return str.substring(j + 1);
+  }
+
+  /**
+   * Sets the byte content for this record
+   *
+   * @param content
+   */
+  public void setContent(String content) {
+    setContent(content.getBytes());
+  }
+
+  public String getDisplayContentType() {
+    return "text/html";
+  }
+
+  /**
+   * Warc header class
+   */
+  public class WarcHeader {
+    public String contentType = "";
+    public String UUID = "";
+    public String dateString = "";
+    public String recordType = "";
+    public HashMap<String, String> metadata = new HashMap<String, String>();
+    public int contentLength = 0;
+
     /**
-     * Gets the header record type string
+     * Default constructor
      */
-    public String getHeaderRecordType() {
-        return warcHeader.recordType;
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder retBuffer = new StringBuilder();
-        retBuffer.append(warcHeader.toString());
-        retBuffer.append(NEWLINE);
-        retBuffer.append(warcContent);
-        return retBuffer.toString();
+    public WarcHeader() {
     }
 
     /**
-     * Gets the WARC header as a string
+     * Copy Constructor
+     *
+     * @param o other WARC header
      */
-    public String getHeaderString() {
-        return warcHeader.toString();
+    public WarcHeader(WarcHeader o) {
+      this.contentType = o.contentType;
+      this.UUID = o.UUID;
+      this.dateString = o.dateString;
+      this.recordType = o.recordType;
+      this.metadata.putAll(o.metadata);
+      this.contentLength = o.contentLength;
     }
 
     /**
      * Serialization output
      *
-     * @param out
+     * @param out the data output stream
      * @throws java.io.IOException
      */
     public void write(DataOutput out) throws IOException {
-        warcHeader.write(out);
-        out.write(warcContent);
+      out.writeUTF(contentType);
+      out.writeUTF(UUID);
+      out.writeUTF(dateString);
+      out.writeUTF(recordType);
+      out.writeInt(metadata.size());
+      Iterator<Entry<String, String>> metadataIterator = metadata.entrySet().iterator();
+      while (metadataIterator.hasNext()) {
+        Entry<String, String> thisEntry = metadataIterator.next();
+        out.writeUTF(thisEntry.getKey());
+        out.writeUTF(thisEntry.getValue());
+      }
+      out.writeInt(contentLength);
     }
 
     /**
      * Serialization input
      *
-     * @param in
+     * @param in the data input stream
      * @throws java.io.IOException
      */
     public void readFields(DataInput in) throws IOException {
-        warcHeader.readFields(in);
-        int contentLengthBytes = warcHeader.contentLength;
-        warcContent = new byte[contentLengthBytes];
-        in.readFully(warcContent);
-    }
-
-    public String getDocid() {
-        return getHeaderMetadataItem("WARC-TREC-ID");
-    }
-
-    public String getURL() {
-        return getHeaderMetadataItem("WARC-Target-URI");
-    }
-
-    public String getContent() {
-        String str = getContentUTF8();
-        int i = str.indexOf("Content-Length:");
-        int j = str.indexOf("\n", i);
-
-        return str.substring(j + 1);
-    }
-
-    /**
-     * Sets the byte content for this record
-     *
-     * @param content
-     */
-    public void setContent(String content) {
-        setContent(content.getBytes());
-    }
-
-    public String getDisplayContentType() {
-        return "text/html";
+      contentType = in.readUTF();
+      UUID = in.readUTF();
+      dateString = in.readUTF();
+      recordType = in.readUTF();
+      metadata.clear();
+      int numMetaItems = in.readInt();
+      for (int i = 0; i < numMetaItems; i++) {
+        String thisKey = in.readUTF();
+        String thisValue = in.readUTF();
+        metadata.put(thisKey, thisValue);
+      }
+      contentLength = in.readInt();
     }
 
-    /**
-     * Warc header class
-     */
-    public class WarcHeader {
-        public String contentType = "";
-        public String UUID = "";
-        public String dateString = "";
-        public String recordType = "";
-        public HashMap<String, String> metadata = new HashMap<String, String>();
-        public int contentLength = 0;
-
-        /**
-         * Default constructor
-         */
-        public WarcHeader() {
-        }
-
-        /**
-         * Copy Constructor
-         *
-         * @param o other WARC header
-         */
-        public WarcHeader(WarcHeader o) {
-            this.contentType = o.contentType;
-            this.UUID = o.UUID;
-            this.dateString = o.dateString;
-            this.recordType = o.recordType;
-            this.metadata.putAll(o.metadata);
-            this.contentLength = o.contentLength;
-        }
-
-        /**
-         * Serialization output
-         *
-         * @param out the data output stream
-         * @throws java.io.IOException
-         */
-        public void write(DataOutput out) throws IOException {
-            out.writeUTF(contentType);
-            out.writeUTF(UUID);
-            out.writeUTF(dateString);
-            out.writeUTF(recordType);
-            out.writeInt(metadata.size());
-            Iterator<Entry<String, String>> metadataIterator = metadata.entrySet().iterator();
-            while (metadataIterator.hasNext()) {
-                Entry<String, String> thisEntry = metadataIterator.next();
-                out.writeUTF(thisEntry.getKey());
-                out.writeUTF(thisEntry.getValue());
-            }
-            out.writeInt(contentLength);
-        }
-
-        /**
-         * Serialization input
-         *
-         * @param in the data input stream
-         * @throws java.io.IOException
-         */
-        public void readFields(DataInput in) throws IOException {
-            contentType = in.readUTF();
-            UUID = in.readUTF();
-            dateString = in.readUTF();
-            recordType = in.readUTF();
-            metadata.clear();
-            int numMetaItems = in.readInt();
-            for (int i = 0; i < numMetaItems; i++) {
-                String thisKey = in.readUTF();
-                String thisValue = in.readUTF();
-                metadata.put(thisKey, thisValue);
-            }
-            contentLength = in.readInt();
-        }
-
-        @Override
-        public String toString() {
-            StringBuilder retBuffer = new StringBuilder();
+    @Override
+    public String toString() {
+      StringBuilder retBuffer = new StringBuilder();
 
-            retBuffer.append(WARC_VERSION);
-            retBuffer.append(NEWLINE);
+      retBuffer.append(WARC_VERSION);
+      retBuffer.append(NEWLINE);
 
-            retBuffer.append("WARC-Type: " + recordType + NEWLINE);
-            retBuffer.append("WARC-Date: " + dateString + NEWLINE);
+      retBuffer.append("WARC-Type: " + recordType + NEWLINE);
+      retBuffer.append("WARC-Date: " + dateString + NEWLINE);
 
-            retBuffer.append("WARC-Record-ID: " + UUID + NEWLINE);
-            Iterator<Entry<String, String>> metadataIterator = metadata.entrySet().iterator();
-            while (metadataIterator.hasNext()) {
-                Entry<String, String> thisEntry = metadataIterator.next();
-                retBuffer.append(thisEntry.getKey());
-                retBuffer.append(": ");
-                retBuffer.append(thisEntry.getValue());
-                retBuffer.append(NEWLINE);
-            }
+      retBuffer.append("WARC-Record-ID: " + UUID + NEWLINE);
+      Iterator<Entry<String, String>> metadataIterator = metadata.entrySet().iterator();
+      while (metadataIterator.hasNext()) {
+        Entry<String, String> thisEntry = metadataIterator.next();
+        retBuffer.append(thisEntry.getKey());
+        retBuffer.append(": ");
+        retBuffer.append(thisEntry.getValue());
+        retBuffer.append(NEWLINE);
+      }
 
-            retBuffer.append("Content-Type: " + contentType + NEWLINE);
-            retBuffer.append("Content-Length: " + contentLength + NEWLINE);
+      retBuffer.append("Content-Type: " + contentType + NEWLINE);
+      retBuffer.append("Content-Length: " + contentLength + NEWLINE);
 
-            return retBuffer.toString();
-        }
+      return retBuffer.toString();
     }
+  }
 }
diff --git a/src/main/java/io/anserini/index/IndexArgs.java b/src/main/java/io/anserini/index/IndexArgs.java
index 32fe3cd794..34b78ec790 100644
--- a/src/main/java/io/anserini/index/IndexArgs.java
+++ b/src/main/java/io/anserini/index/IndexArgs.java
@@ -24,25 +24,25 @@
  */
 public class IndexArgs {
 
-    // required arguments
+  // required arguments
 
-    @Option(name = "-input", metaVar = "[Path]", required = true, usage = "Collection Directory")
-    String input;
+  @Option(name = "-input", metaVar = "[Path]", required = true, usage = "Collection Directory")
+  String input;
 
-    @Option(name = "-index", metaVar = "[Path]", required = true, usage = "Lucene index")
-    String index;
+  @Option(name = "-index", metaVar = "[Path]", required = true, usage = "Lucene index")
+  String index;
 
-    @Option(name = "-threads", metaVar = "[Number]", required = true, usage = "Number of Threads")
-    int threads;
+  @Option(name = "-threads", metaVar = "[Number]", required = true, usage = "Number of Threads")
+  int threads;
 
-    // optional arguments
+  // optional arguments
 
-    @Option(name = "-positions", usage = "Boolean switch to index positions")
-    boolean positions = false;
+  @Option(name = "-positions", usage = "Boolean switch to index positions")
+  boolean positions = false;
 
-    @Option(name = "-optimize", usage = "Boolean switch to optimize index (force merge)")
-    boolean optimize = false;
+  @Option(name = "-optimize", usage = "Boolean switch to optimize index (force merge)")
+  boolean optimize = false;
 
-    @Option(name = "-doclimit", metaVar = "[Number]", required = false, usage = "Maximum number of *.warc documents to index (-1 to index everything)")
-    int doclimit = -1;
+  @Option(name = "-doclimit", metaVar = "[Number]", required = false, usage = "Maximum number of *.warc documents to index (-1 to index everything)")
+  int doclimit = -1;
 }
\ No newline at end of file
diff --git a/src/main/java/io/anserini/index/IndexClueWeb09b.java b/src/main/java/io/anserini/index/IndexClueWeb09b.java
index a565cc40c1..80fbd1091a 100644
--- a/src/main/java/io/anserini/index/IndexClueWeb09b.java
+++ b/src/main/java/io/anserini/index/IndexClueWeb09b.java
@@ -57,244 +57,244 @@
  */
 public final class IndexClueWeb09b {
 
-    private static final Logger LOG = LogManager.getLogger(IndexClueWeb09b.class);
+  private static final Logger LOG = LogManager.getLogger(IndexClueWeb09b.class);
 
-    public static final String FIELD_BODY = "contents";
-    public static final String FIELD_ID = "id";
-    private static final String RESPONSE = "response";
+  public static final String FIELD_BODY = "contents";
+  public static final String FIELD_ID = "id";
+  private static final String RESPONSE = "response";
 
-    private final class IndexerThread extends Thread {
+  private final class IndexerThread extends Thread {
 
-        final private Path inputWarcFile;
+    final private Path inputWarcFile;
 
-        final private IndexWriter writer;
+    final private IndexWriter writer;
 
-        volatile int addCount;
+    volatile int addCount;
 
-        public IndexerThread(IndexWriter writer, Path inputWarcFile) throws IOException {
-            this.writer = writer;
-            this.inputWarcFile = inputWarcFile;
-            setName(inputWarcFile.getFileName().toString());
-        }
+    public IndexerThread(IndexWriter writer, Path inputWarcFile) throws IOException {
+      this.writer = writer;
+      this.inputWarcFile = inputWarcFile;
+      setName(inputWarcFile.getFileName().toString());
+    }
 
-        private int indexWarcFile() throws IOException {
+    private int indexWarcFile() throws IOException {
 
-            int i = 0;
+      int i = 0;
 
-            try (DataInputStream inStream = new DataInputStream(new GZIPInputStream(Files.newInputStream(inputWarcFile, StandardOpenOption.READ)))) {
+      try (DataInputStream inStream = new DataInputStream(new GZIPInputStream(Files.newInputStream(inputWarcFile, StandardOpenOption.READ)))) {
 
-                // iterate through our stream
-                ClueWeb09WarcRecord wDoc;
-                while ((wDoc = ClueWeb09WarcRecord.readNextWarcRecord(inStream)) != null) {
-                    // see if it's a response record
-                    if (RESPONSE.equals(wDoc.getHeaderRecordType())) {
+        // iterate through our stream
+        ClueWeb09WarcRecord wDoc;
+        while ((wDoc = ClueWeb09WarcRecord.readNextWarcRecord(inStream)) != null) {
+          // see if it's a response record
+          if (RESPONSE.equals(wDoc.getHeaderRecordType())) {
 
-                        String id = wDoc.getDocid();
+            String id = wDoc.getDocid();
 
-                        org.jsoup.nodes.Document jDoc = Jsoup.parse(wDoc.getContent());
+            org.jsoup.nodes.Document jDoc = Jsoup.parse(wDoc.getContent());
 
-                        String contents = jDoc.text();
-                        // don't index empty documents
-                        if (contents.trim().length() == 0) {
-                            System.err.println(id);
-                            continue;
-                        }
+            String contents = jDoc.text();
+            // don't index empty documents
+            if (contents.trim().length() == 0) {
+              System.err.println(id);
+              continue;
+            }
 
-                        // make a new, empty document
-                        Document document = new Document();
+            // make a new, empty document
+            Document document = new Document();
 
-                        // document ID
-                        document.add(new StringField(FIELD_ID, id, Field.Store.YES));
+            // document ID
+            document.add(new StringField(FIELD_ID, id, Field.Store.YES));
 
-                        // entire document
-                        if (positions)
-                            document.add(new TextField(FIELD_BODY, contents, Field.Store.NO));
-                        else
-                            document.add(new NoPositionsTextField(FIELD_BODY, contents));
+            // entire document
+            if (positions)
+              document.add(new TextField(FIELD_BODY, contents, Field.Store.NO));
+            else
+              document.add(new NoPositionsTextField(FIELD_BODY, contents));
 
-                        writer.addDocument(document);
-                        i++;
-                    }
-                }
-            }
-            return i;
+            writer.addDocument(document);
+            i++;
+          }
         }
+      }
+      return i;
+    }
 
-        @Override
-        public void run() {
-            try {
-                addCount = indexWarcFile();
-                System.out.println("*./" + inputWarcFile.getParent().getFileName().toString() + File.separator + inputWarcFile.getFileName().toString() + "  " + addCount);
-            } catch (IOException ioe) {
-                System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected IOException:");
-                ioe.printStackTrace(System.out);
-            }
-        }
+    @Override
+    public void run() {
+      try {
+        addCount = indexWarcFile();
+        System.out.println("*./" + inputWarcFile.getParent().getFileName().toString() + File.separator + inputWarcFile.getFileName().toString() + "  " + addCount);
+      } catch (IOException ioe) {
+        System.out.println(Thread.currentThread().getName() + ": ERROR: unexpected IOException:");
+        ioe.printStackTrace(System.out);
+      }
     }
+  }
 
-    private final Path indexPath;
-    private final Path docDir;
+  private final Path indexPath;
+  private final Path docDir;
 
-    private boolean positions = false;
+  private boolean positions = false;
 
-    public void setPositions(boolean positions) {
-        this.positions = positions;
-    }
+  public void setPositions(boolean positions) {
+    this.positions = positions;
+  }
 
-    private boolean optimize = false;
+  private boolean optimize = false;
 
-    public void setOptimize(boolean optimize) {
-        this.optimize = optimize;
-    }
+  public void setOptimize(boolean optimize) {
+    this.optimize = optimize;
+  }
 
-    private int doclimit = -1;
+  private int doclimit = -1;
 
-    public void setDocLimit(int doclimit) {
-        this.doclimit = doclimit;
-    }
+  public void setDocLimit(int doclimit) {
+    this.doclimit = doclimit;
+  }
 
-    public IndexClueWeb09b(String docsPath, String indexPath) throws IOException {
+  public IndexClueWeb09b(String docsPath, String indexPath) throws IOException {
 
-        this.indexPath = Paths.get(indexPath);
-        if (!Files.exists(this.indexPath))
-            Files.createDirectories(this.indexPath);
+    this.indexPath = Paths.get(indexPath);
+    if (!Files.exists(this.indexPath))
+      Files.createDirectories(this.indexPath);
 
-        docDir = Paths.get(docsPath);
-        if (!Files.exists(docDir) || !Files.isReadable(docDir) || !Files.isDirectory(docDir)) {
-            System.out.println("Document directory '" + docDir.toString() + "' does not exist or is not readable, please check the path");
-            System.exit(1);
-        }
+    docDir = Paths.get(docsPath);
+    if (!Files.exists(docDir) || !Files.isReadable(docDir) || !Files.isDirectory(docDir)) {
+      System.out.println("Document directory '" + docDir.toString() + "' does not exist or is not readable, please check the path");
+      System.exit(1);
     }
+  }
 
 
-    private final static PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:*.warc.gz");
+  private final static PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:*.warc.gz");
 
 
-    static List<Path> discoverWarcFiles(Path p) {
+  static List<Path> discoverWarcFiles(Path p) {
 
-        final List<Path> warcFiles = new ArrayList<>();
+    final List<Path> warcFiles = new ArrayList<>();
 
-        FileVisitor<Path> fv = new SimpleFileVisitor<Path>() {
-            @Override
-            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+    FileVisitor<Path> fv = new SimpleFileVisitor<Path>() {
+      @Override
+      public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
 
-                Path name = file.getFileName();
-                if (name != null && matcher.matches(name))
-                    warcFiles.add(file);
-                return FileVisitResult.CONTINUE;
-            }
-        };
+        Path name = file.getFileName();
+        if (name != null && matcher.matches(name))
+          warcFiles.add(file);
+        return FileVisitResult.CONTINUE;
+      }
+    };
 
-        try {
-            Files.walkFileTree(p, fv);
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return warcFiles;
+    try {
+      Files.walkFileTree(p, fv);
+    } catch (IOException e) {
+      e.printStackTrace();
     }
-
-    /**
-     * KStemAnalyzer: Filters {@link ClassicTokenizer} with {@link org.apache.lucene.analysis.standard.ClassicFilter},
-     * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and {@link org.apache.lucene.analysis.en.KStemFilter}.
-     *
-     * @return KStemAnalyzer
-     * @throws IOException
-     */
-    public static Analyzer analyzer() throws IOException {
-        return CustomAnalyzer.builder()
-                .withTokenizer("classic")
-                .addTokenFilter("classic")
-                .addTokenFilter("lowercase")
-                .addTokenFilter("kstem")
-                .build();
+    return warcFiles;
+  }
+
+  /**
+   * KStemAnalyzer: Filters {@link ClassicTokenizer} with {@link org.apache.lucene.analysis.standard.ClassicFilter},
+   * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and {@link org.apache.lucene.analysis.en.KStemFilter}.
+   *
+   * @return KStemAnalyzer
+   * @throws IOException
+   */
+  public static Analyzer analyzer() throws IOException {
+    return CustomAnalyzer.builder()
+            .withTokenizer("classic")
+            .addTokenFilter("classic")
+            .addTokenFilter("lowercase")
+            .addTokenFilter("kstem")
+            .build();
+  }
+
+  public int indexWithThreads(int numThreads) throws IOException, InterruptedException {
+
+    System.out.println("Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");
+
+    final Directory dir = FSDirectory.open(indexPath);
+
+    final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());
+
+    iwc.setSimilarity(new BM25Similarity());
+    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+    iwc.setRAMBufferSizeMB(256.0);
+    iwc.setUseCompoundFile(false);
+    iwc.setMergeScheduler(new ConcurrentMergeScheduler());
+
+    final IndexWriter writer = new IndexWriter(dir, iwc);
+
+    final ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+
+    List<Path> warcFiles = discoverWarcFiles(docDir);
+    if (doclimit > 0 && warcFiles.size() < doclimit)
+      warcFiles = warcFiles.subList(0, doclimit);
+
+    for (Path f : warcFiles)
+      executor.execute(new IndexerThread(writer, f));
+
+    //add some delay to let some threads spawn by scheduler
+    Thread.sleep(30000);
+    executor.shutdown(); // Disable new tasks from being submitted
+
+    try {
+      // Wait for existing tasks to terminate
+      while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
+        Thread.sleep(1000);
+      }
+    } catch (InterruptedException ie) {
+      // (Re-)Cancel if current thread also interrupted
+      executor.shutdownNow();
+      // Preserve interrupt status
+      Thread.currentThread().interrupt();
     }
 
-    public int indexWithThreads(int numThreads) throws IOException, InterruptedException {
-
-        System.out.println("Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");
-
-        final Directory dir = FSDirectory.open(indexPath);
+    int numIndexed = writer.maxDoc();
 
-        final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());
-
-        iwc.setSimilarity(new BM25Similarity());
-        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
-        iwc.setRAMBufferSizeMB(256.0);
-        iwc.setUseCompoundFile(false);
-        iwc.setMergeScheduler(new ConcurrentMergeScheduler());
-
-        final IndexWriter writer = new IndexWriter(dir, iwc);
-
-        final ExecutorService executor = Executors.newFixedThreadPool(numThreads);
-
-        List<Path> warcFiles = discoverWarcFiles(docDir);
-        if (doclimit > 0 && warcFiles.size() < doclimit)
-            warcFiles = warcFiles.subList(0, doclimit);
+    try {
+      writer.commit();
+      if (optimize)
+        writer.forceMerge(1);
+    } finally {
+      writer.close();
+    }
 
-        for (Path f : warcFiles)
-            executor.execute(new IndexerThread(writer, f));
+    return numIndexed;
+  }
 
-        //add some delay to let some threads spawn by scheduler
-        Thread.sleep(30000);
-        executor.shutdown(); // Disable new tasks from being submitted
+  public static void main(String[] args) throws IOException, InterruptedException {
 
-        try {
-            // Wait for existing tasks to terminate
-            while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
-                Thread.sleep(1000);
-            }
-        } catch (InterruptedException ie) {
-            // (Re-)Cancel if current thread also interrupted
-            executor.shutdownNow();
-            // Preserve interrupt status
-            Thread.currentThread().interrupt();
-        }
+    IndexArgs indexArgs = new IndexArgs();
 
-        int numIndexed = writer.maxDoc();
+    CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(90));
 
-        try {
-            writer.commit();
-            if (optimize)
-                writer.forceMerge(1);
-        } finally {
-            writer.close();
-        }
-
-        return numIndexed;
+    try {
+      parser.parseArgument(args);
+    } catch (CmdLineException e) {
+      System.err.println(e.getMessage());
+      parser.printUsage(System.err);
+      System.err.println("Example: IndexClueWeb09b" + parser.printExample(OptionHandlerFilter.REQUIRED));
+      return;
     }
 
-    public static void main(String[] args) throws IOException, InterruptedException {
-
-        IndexArgs indexArgs = new IndexArgs();
+    final long start = System.nanoTime();
+    IndexClueWeb09b indexer = new IndexClueWeb09b(indexArgs.input, indexArgs.index);
 
-        CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(90));
-
-        try {
-            parser.parseArgument(args);
-        } catch (CmdLineException e) {
-            System.err.println(e.getMessage());
-            parser.printUsage(System.err);
-            System.err.println("Example: IndexClueWeb09b" + parser.printExample(OptionHandlerFilter.REQUIRED));
-            return;
-        }
+    indexer.setPositions(indexArgs.positions);
+    indexer.setOptimize(indexArgs.optimize);
+    indexer.setDocLimit(indexArgs.doclimit);
 
-        final long start = System.nanoTime();
-        IndexClueWeb09b indexer = new IndexClueWeb09b(indexArgs.input, indexArgs.index);
+    LOG.info("Index path: " + indexArgs.index);
+    LOG.info("Threads: " + indexArgs.threads);
+    LOG.info("Positions: " + indexArgs.positions);
+    LOG.info("Optimize (merge segments): " + indexArgs.optimize);
+    LOG.info("Doc limit: " + (indexArgs.doclimit == -1 ? "all docs" : "" + indexArgs.doclimit));
 
-        indexer.setPositions(indexArgs.positions);
-        indexer.setOptimize(indexArgs.optimize);
-        indexer.setDocLimit(indexArgs.doclimit);
+    LOG.info("Indexer: start");
 
-        LOG.info("Index path: " + indexArgs.index);
-        LOG.info("Threads: " + indexArgs.threads);
-        LOG.info("Positions: " + indexArgs.positions);
-        LOG.info("Optimize (merge segments): " + indexArgs.optimize);
-        LOG.info("Doc limit: " + (indexArgs.doclimit == -1 ? "all docs" : "" + indexArgs.doclimit));
-
-        LOG.info("Indexer: start");
-
-        int numIndexed = indexer.indexWithThreads(indexArgs.threads);
-        final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
-        LOG.info("Total " + numIndexed + " documents indexed in " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
-    }
+    int numIndexed = indexer.indexWithThreads(indexArgs.threads);
+    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
+    LOG.info("Total " + numIndexed + " documents indexed in " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
+  }
 }
diff --git a/src/main/java/io/anserini/search/SearchClueWeb09b.java b/src/main/java/io/anserini/search/SearchClueWeb09b.java
index a7ec003ac0..03f14fa1f8 100644
--- a/src/main/java/io/anserini/search/SearchClueWeb09b.java
+++ b/src/main/java/io/anserini/search/SearchClueWeb09b.java
@@ -48,155 +48,155 @@
  */
 public final class SearchClueWeb09b implements Closeable {
 
-    private final IndexReader reader;
+  private final IndexReader reader;
 
-    public SearchClueWeb09b(String indexDir) throws IOException {
+  public SearchClueWeb09b(String indexDir) throws IOException {
 
-        Path indexPath = Paths.get(indexDir);
+    Path indexPath = Paths.get(indexDir);
 
-        if (!Files.exists(indexPath) || !Files.isDirectory(indexPath) || !Files.isReadable(indexPath)) {
-            throw new IllegalArgumentException(indexDir + " does not exist or is not a directory.");
-        }
-
-        this.reader = DirectoryReader.open(FSDirectory.open(indexPath));
+    if (!Files.exists(indexPath) || !Files.isDirectory(indexPath) || !Files.isReadable(indexPath)) {
+      throw new IllegalArgumentException(indexDir + " does not exist or is not a directory.");
     }
 
-    @Override
-    public void close() throws IOException {
-        reader.close();
-    }
+    this.reader = DirectoryReader.open(FSDirectory.open(indexPath));
+  }
 
-    private static String extract(String line, String tag) {
+  @Override
+  public void close() throws IOException {
+    reader.close();
+  }
 
-        int i = line.indexOf(tag);
+  private static String extract(String line, String tag) {
 
-        if (i == -1) throw new IllegalArgumentException("line does not contain the tag : " + tag);
+    int i = line.indexOf(tag);
 
-        int j = line.indexOf("\"", i + tag.length() + 2);
+    if (i == -1) throw new IllegalArgumentException("line does not contain the tag : " + tag);
 
-        if (j == -1) throw new IllegalArgumentException("line does not contain quotation");
+    int j = line.indexOf("\"", i + tag.length() + 2);
 
-        return line.substring(i + tag.length() + 2, j);
-    }
+    if (j == -1) throw new IllegalArgumentException("line does not contain quotation");
 
-    /**
-     * @param topicsFile One of: topics.web.1-50.txt topics.web.51-100.txt topics.web.101-150.txt topics.web.151-200.txt
-     * @return SortedMap where keys are query/topic IDs and values are title portions of the topics
-     * @throws IOException
-     */
-    static SortedMap<Integer, String> readQueries(Path topicsFile) throws IOException {
+    return line.substring(i + tag.length() + 2, j);
+  }
 
-        SortedMap<Integer, String> map = new TreeMap<>();
-        List<String> lines = Files.readAllLines(topicsFile, StandardCharsets.UTF_8);
+  /**
+   * @param topicsFile One of: topics.web.1-50.txt topics.web.51-100.txt topics.web.101-150.txt topics.web.151-200.txt
+   * @return SortedMap where keys are query/topic IDs and values are title portions of the topics
+   * @throws IOException
+   */
+  static SortedMap<Integer, String> readQueries(Path topicsFile) throws IOException {
 
-        String number = "";
-        String query = "";
+    SortedMap<Integer, String> map = new TreeMap<>();
+    List<String> lines = Files.readAllLines(topicsFile, StandardCharsets.UTF_8);
 
-        for (String line : lines) {
+    String number = "";
+    String query = "";
 
-            line = line.trim();
+    for (String line : lines) {
 
-            if (line.startsWith("<topic"))
-                number = extract(line, "number");
+      line = line.trim();
 
-            if (line.startsWith("<query>") && line.endsWith("</query>"))
-                query = line.substring(7, line.length() - 8).trim();
+      if (line.startsWith("<topic"))
+        number = extract(line, "number");
 
-            if (line.startsWith("</topic>"))
-                map.put(Integer.parseInt(number), query);
+      if (line.startsWith("<query>") && line.endsWith("</query>"))
+        query = line.substring(7, line.length() - 8).trim();
 
-        }
+      if (line.startsWith("</topic>"))
+        map.put(Integer.parseInt(number), query);
 
-        lines.clear();
-        return map;
     }
 
-    /**
-     * Prints TREC submission file to the standard output stream.
-     *
-     * @param topicsFile One of: topics.web.1-50.txt topics.web.51-100.txt topics.web.101-150.txt topics.web.151-200.txt
-     * @param operator   Default search operator: AND or OR
-     * @throws IOException
-     * @throws ParseException
-     */
-
-    public void search(String topicsFile, String submissionFile, QueryParser.Operator operator) throws IOException, ParseException {
-
-        Path topicsPath = Paths.get(topicsFile);
-
-        if (!Files.exists(topicsPath) || !Files.isRegularFile(topicsPath) || !Files.isReadable(topicsPath)) {
-            throw new IllegalArgumentException("Topics file : " + topicsFile + " does not exist or is not a (readable) file.");
-        }
-
-        IndexSearcher searcher = new IndexSearcher(reader);
-        searcher.setSimilarity(new BM25Similarity());
-
-
-        final String runTag = "BM25_Krovetz_" + FIELD_BODY + "_" + operator.toString();
-
-        PrintWriter out = new PrintWriter(Files.newBufferedWriter(Paths.get(submissionFile), StandardCharsets.US_ASCII));
-
-
-        QueryParser queryParser = new QueryParser(FIELD_BODY, analyzer());
-        queryParser.setDefaultOperator(operator);
-
-
-        SortedMap<Integer, String> topics = readQueries(topicsPath);
-
-        for (Map.Entry<Integer, String> entry : topics.entrySet()) {
-
-            int qID = entry.getKey();
-            String queryString = entry.getValue();
-            Query query = queryParser.parse(queryString);
-
-            /**
-             * For Web Tracks 2010,2011,and 2012; an experimental run consists of the top 10,000 documents for each topic query.
-             */
-            ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
-
-            /**
-             * the first column is the topic number.
-             * the second column is currently unused and should always be "Q0".
-             * the third column is the official document identifier of the retrieved document.
-             * the fourth column is the rank the document is retrieved.
-             * the fifth column shows the score (integer or floating point) that generated the ranking.
-             * the sixth column is called the "run tag" and should be a unique identifier for your
-             */
-            for (int i = 0; i < hits.length; i++) {
-                int docId = hits[i].doc;
-                Document doc = searcher.doc(docId);
-                out.print(qID);
-                out.print("\tQ0\t");
-                out.print(doc.get(FIELD_ID));
-                out.print("\t");
-                out.print(i);
-                out.print("\t");
-                out.print(hits[i].score);
-                out.print("\t");
-                out.print(runTag);
-                out.println();
-            }
-        }
-        out.flush();
-        out.close();
+    lines.clear();
+    return map;
+  }
+
+  /**
+   * Prints TREC submission file to the standard output stream.
+   *
+   * @param topicsFile One of: topics.web.1-50.txt topics.web.51-100.txt topics.web.101-150.txt topics.web.151-200.txt
+   * @param operator   Default search operator: AND or OR
+   * @throws IOException
+   * @throws ParseException
+   */
+
+  public void search(String topicsFile, String submissionFile, QueryParser.Operator operator) throws IOException, ParseException {
+
+    Path topicsPath = Paths.get(topicsFile);
+
+    if (!Files.exists(topicsPath) || !Files.isRegularFile(topicsPath) || !Files.isReadable(topicsPath)) {
+      throw new IllegalArgumentException("Topics file : " + topicsFile + " does not exist or is not a (readable) file.");
     }
 
-    public static void main(String[] args) throws IOException, ParseException {
+    IndexSearcher searcher = new IndexSearcher(reader);
+    searcher.setSimilarity(new BM25Similarity());
+
+
+    final String runTag = "BM25_Krovetz_" + FIELD_BODY + "_" + operator.toString();
 
-        if (args.length != 3) {
-            System.err.println("Usage: SearcherCW09B <topicsFile> <submissionFile> <indexDir>");
-            System.err.println("topicsFile: input file containing queries. One of: topics.web.1-50.txt topics.web.51-100.txt topics.web.101-150.txt topics.web.151-200.txt");
-            System.err.println("submissionFile: redirect stdout to capture the submission file for trec_eval or gdeval.pl");
-            System.err.println("indexDir: index directory");
-            System.exit(1);
-        }
+    PrintWriter out = new PrintWriter(Files.newBufferedWriter(Paths.get(submissionFile), StandardCharsets.US_ASCII));
 
-        String topicsFile = args[0];
-        String submissionFile = args[1];
-        String indexDir = args[2];
 
-        SearchClueWeb09b searcher = new SearchClueWeb09b(indexDir);
-        searcher.search(topicsFile, submissionFile, QueryParser.Operator.OR);
-        searcher.close();
+    QueryParser queryParser = new QueryParser(FIELD_BODY, analyzer());
+    queryParser.setDefaultOperator(operator);
+
+
+    SortedMap<Integer, String> topics = readQueries(topicsPath);
+
+    for (Map.Entry<Integer, String> entry : topics.entrySet()) {
+
+      int qID = entry.getKey();
+      String queryString = entry.getValue();
+      Query query = queryParser.parse(queryString);
+
+      /**
+       * For Web Tracks 2010,2011,and 2012; an experimental run consists of the top 10,000 documents for each topic query.
+       */
+      ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
+
+      /**
+       * the first column is the topic number.
+       * the second column is currently unused and should always be "Q0".
+       * the third column is the official document identifier of the retrieved document.
+       * the fourth column is the rank the document is retrieved.
+       * the fifth column shows the score (integer or floating point) that generated the ranking.
+       * the sixth column is called the "run tag" and should be a unique identifier for your
+       */
+      for (int i = 0; i < hits.length; i++) {
+        int docId = hits[i].doc;
+        Document doc = searcher.doc(docId);
+        out.print(qID);
+        out.print("\tQ0\t");
+        out.print(doc.get(FIELD_ID));
+        out.print("\t");
+        out.print(i);
+        out.print("\t");
+        out.print(hits[i].score);
+        out.print("\t");
+        out.print(runTag);
+        out.println();
+      }
+    }
+    out.flush();
+    out.close();
+  }
+
+  public static void main(String[] args) throws IOException, ParseException {
+
+    if (args.length != 3) {
+      System.err.println("Usage: SearcherCW09B <topicsFile> <submissionFile> <indexDir>");
+      System.err.println("topicsFile: input file containing queries. One of: topics.web.1-50.txt topics.web.51-100.txt topics.web.101-150.txt topics.web.151-200.txt");
+      System.err.println("submissionFile: redirect stdout to capture the submission file for trec_eval or gdeval.pl");
+      System.err.println("indexDir: index directory");
+      System.exit(1);
     }
+
+    String topicsFile = args[0];
+    String submissionFile = args[1];
+    String indexDir = args[2];
+
+    SearchClueWeb09b searcher = new SearchClueWeb09b(indexDir);
+    searcher.search(topicsFile, submissionFile, QueryParser.Operator.OR);
+    searcher.close();
+  }
 }