Skip to content
master
Switch branches/tags
Code

Files

Permalink
Failed to load latest commit information.
Type
Name
Latest commit message
Commit time
src
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Download Build Status codecov

indexer4j

Simple full text indexing and searching library for Java

Install

Gradle

repositories {
    jcenter()
}

dependencies {
   compile 'com.haeungun.indexer4j:indexer4j:<:the-latest-version>'
}

Maven

<dependency>
	<groupId>com.haeungun.indexer4j</groupId>
	<artifactId>indexer4j</artifactId>
	<version>the-latest-version</version>
	<type>pom</type>
</dependency>

Features

  • Support TF-IDF, BM25 score
  • Support Regex, N-gram tokenizer
  • Easy indexing with field annotation

TODO

  • Parrallel build and search
  • Support JDK 11 CI on travis CI (Jacoco does not support JDK11 yet)
  • Improve saving and loading features

Examples

@Document
public class ExampleDocument {

    @DocumentId
    private String id;

    @DocumentField
    private String title;

    @DocumentField
    private String contents;

    public ExampleDocument(String id, String title, String contents) {
        this.id = id;
        this.title = title;
        this.contents = contents;
    }

    public String getId() {
        return id;
    }

    public String getTitle() {
        return title;
    }

    public String getContents() {
        return contents;
    }
}

List<ExampleDocument> documents = Arrays.asList(
            new ExampleDocument("doc1", "First Document", "Lorem Lorem Lorem Lorem Lorem"),
            new ExampleDocument("doc2", "Third Document", "Lorem is hello java python"),
            new ExampleDocument("doc3", "Second Document", "Lorem ipsum dolor"),
            new ExampleDocument("doc4", "Forth Document", "Lorem"));

Indexer<ExampleDocument> index = new Indexer<>();
for (ExampleDocument document : documents) {
     index.add(document);
}

index.build();

List<SearchResult> result = index.search("First");
// output => [{docId="doc1", score="..."}]

List<SearchResult> result2 = index.search("Lorem ipsum");
// output = [{docId="doc3", score="..."}, {docId="doc1", score="..."}, ...]