Skip to content

Commit

Permalink
Upgrade to Lucene 9.3.0 and remove ES/Solr code paths (#1951)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool committed Aug 2, 2022
1 parent 5af657d commit 2725655
Show file tree
Hide file tree
Showing 45 changed files with 55 additions and 6,014 deletions.
103 changes: 27 additions & 76 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>io.anserini</groupId>
<artifactId>anserini</artifactId>
<version>0.14.5-SNAPSHOT</version>
<version>0.15.0-SNAPSHOT</version>
<name>Anserini</name>
<description>An information retrieval toolkit built on Lucene</description>
<url>http://anserini.io/</url>
Expand All @@ -26,8 +26,7 @@
</developers>

<properties>
<lucene.version>9.0.0</lucene.version>
<solr.version>9.0.0</solr.version>
<lucene.version>9.3.0</lucene.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

Expand Down Expand Up @@ -110,14 +109,6 @@
<mainClass>io.anserini.search.SearchCollection</mainClass>
<id>SearchCollection</id>
</program>
<program>
<mainClass>io.anserini.search.SearchSolr</mainClass>
<id>SearchSolr</id>
</program>
<program>
<mainClass>io.anserini.search.SearchElastic</mainClass>
<id>SearchElastic</id>
</program>
<program>
<mainClass>io.anserini.search.SearchMsmarco</mainClass>
<id>SearchMsmarco</id>
Expand Down Expand Up @@ -296,11 +287,31 @@
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-codecs</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-backward-codecs</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-kuromoji</artifactId>
Expand All @@ -323,71 +334,6 @@
<version>4.13.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>${solr.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency> <!-- only needed for testing -->
<groupId>org.apache.solr</groupId>
<artifactId>solr-test-framework</artifactId>
<version>${solr.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-codecs</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.0.0</version>
</dependency>
<dependency>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
Expand Down Expand Up @@ -491,6 +437,11 @@
<artifactId>commons-csv</artifactId>
<version>1.8</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package io.anserini.analysis;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.TokenFilterFactory;

import java.util.Map;

Expand Down
79 changes: 1 addition & 78 deletions src/main/java/io/anserini/index/IndexArgs.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ public class IndexArgs {

// optional arguments

@Option(name = "-index", metaVar = "[path]", forbids = {"-solr", "-es"},
usage = "Index path.")
@Option(name = "-index", metaVar = "[path]", usage = "Index path.")
public String index;

@Option(name = "-fields", handler = StringArrayOptionHandler.class,
Expand Down Expand Up @@ -160,82 +159,6 @@ public class IndexArgs {
usage = "File that contains deleted tweet ids (longs), one per line; these tweets will be skipped during indexing.")
public String tweetDeletedIdsFile = "";

// Solr options

@Option(name = "-solr", forbids = {"-index", "-es"},
usage = "Indexes into Solr.")
public boolean solr = false;

@Option(name = "-solr.batch", metaVar = "[n]",
usage = "Solr indexing batch size.")
public int solrBatch = 1000;

@Option(name = "-solr.commitWithin", metaVar = "[s]",
usage = "Solr commitWithin setting (in seconds).")
public int solrCommitWithin = 60;

@Option(name = "-solr.index", metaVar = "[name]",
usage = "Solr index name.")
public String solrIndex = null;

@Option(name = "-solr.zkUrl", metaVar = "[urls]",
usage = "Solr ZooKeeper URLs (comma separated list).")
public String zkUrl = null;

@Option(name = "-solr.zkChroot", metaVar = "[path]",
usage = "Solr ZooKeeper chroot")
public String zkChroot = "/";

@Option(name = "-solr.poolSize", metaVar = "[n]",
usage = "Solr client pool size.")
public int solrPoolSize = 16;

// Elasticsearch options

@Option(name = "-es", forbids = {"-index", "-solr"},
usage = "Indexes into Elasticsearch.")
public boolean es = false;

@Option(name = "-es.index", metaVar = "[name]",
usage = "Elasticsearch index name.")
public String esIndex = null;

@Option(name = "-es.batch", metaVar = "[n]",
usage = "Elasticsearch batch index requests size.")
public int esBatch = 1000;

@Option(name = "-es.bulk", metaVar = "[n]",
usage = "Elasticsearch max bulk requests size in bytes.")
public int esBulk = 80000000;

@Option(name = "-es.hostname", metaVar = "[host]",
usage = "Elasticsearch host.")
public String esHostname = "localhost";

@Option(name = "-es.port", metaVar = "[port]",
usage = "Elasticsearch port number.")
public int esPort = 9200;

@Option(name = "-es.user", metaVar = "[username]",
usage = "Elasticsearch user name.")
public String esUser = "elastic";

@Option(name = "-es.password", metaVar = "[password]",
usage = "Elasticsearch password.")
public String esPassword = "changeme";

@Option(name = "-es.poolSize", metaVar = "[num]",
usage = "Elasticsearch client pool size.")
public int esPoolSize = 10;

@Option(name = "-es.connectTimeout", metaVar = "[ms]",
usage = "Elasticsearch (low level) REST client connect timeout (in ms).")
public int esConnectTimeout = TIMEOUT;

@Option(name = "-es.socketTimeout", metaVar = "[ms]",
usage = "Elasticsearch (low level) REST client socket timeout (in ms).")
public int esSocketTimeout = TIMEOUT;

// Sharding options

@Option(name = "-shard.count", metaVar = "[n]",
Expand Down
Loading

0 comments on commit 2725655

Please sign in to comment.