Skip to content

Commit

Permalink
More efficient search
Browse files Browse the repository at this point in the history
  • Loading branch information
klakegg committed Jun 19, 2012
1 parent bad87b8 commit 7fc61b7
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 58 deletions.
59 changes: 41 additions & 18 deletions server/src/main/java/no/difi/datahotel/slave/logic/SearchEJB.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;

import javax.ejb.Stateless;
import javax.ejb.Singleton;

import no.difi.datahotel.model.Result;
import no.difi.datahotel.model.Metadata;
import no.difi.datahotel.model.Result;
import no.difi.datahotel.util.Filesystem;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
Expand All @@ -24,35 +25,57 @@
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

@Stateless
@Singleton
public class SearchEJB {

private static int num = 100;
private static QueryParser parser = new QueryParser(Version.LUCENE_33, "searchable", new StandardAnalyzer(Version.LUCENE_33));

private Map<String, Directory> directories = new HashMap<String, Directory>();
private Map<String, IndexSearcher> searchers = new HashMap<String, IndexSearcher>();

public void update(Metadata metadata) {
try {
Directory oldDirectory = directories.get(metadata.getLocation());
IndexSearcher oldSearcher = searchers.get(metadata.getLocation());

public Result find(Metadata metadata, String q, Map<String, String> lookup, int page) throws Exception {
int num = 100;

Directory newDirectory = FSDirectory.open(Filesystem.getFolderPath(FOLDER_CACHE_INDEX, metadata.getLocation()));
IndexSearcher newSearcher = new IndexSearcher(newDirectory);

directories.put(metadata.getLocation(), newDirectory);
searchers.put(metadata.getLocation(), newSearcher);

oldSearcher.close();
oldDirectory.close();
} catch (Exception e) {
metadata.getLogger().log(Level.WARNING, "Unable to load searcher.", e);
}
}

public Result find(Metadata metadata, String q, Map<String, String> lookup, int page) {
StringBuilder query = new StringBuilder();
if (lookup != null)
for (String key : lookup.keySet())
query.append(query.length() == 0 ? "" : " AND ").append("+").append(key).append(":").append(lookup.get(key));
if (q != null && !q.equals(""))
query.append(query.length() == 0 ? "" : " AND ").append(q);

Directory dir = FSDirectory.open(Filesystem.getFolderPath(FOLDER_CACHE_INDEX, metadata.getLocation()));
IndexSearcher searcher = new IndexSearcher(dir);

TopDocs docs = searcher.search(parser.parse(query.toString()), num * page);
List<Map<String, String>> rdocs = convert(searcher, docs);

searcher.close();
dir.close();

Result result = new Result();
result.setEntries((rdocs.size() < num * (page - 1)) ? new ArrayList<Map<String,String>>() : rdocs.subList(num * (page - 1), rdocs.size()));
result.setPosts(docs.totalHits);
result.setPage(page);


IndexSearcher searcher = searchers.get(metadata.getLocation());
if (searcher != null) {
try {
TopDocs docs = searcher.search(parser.parse(query.toString()), num * page);
List<Map<String, String>> rdocs = convert(searcher, docs);

result.setEntries((rdocs.size() < num * (page - 1)) ? new ArrayList<Map<String,String>>() : rdocs.subList(num * (page - 1), rdocs.size()));
result.setPosts(docs.totalHits);
} catch (Exception e) {
metadata.getLogger().warning("Error in search: " + query.toString());
}
}

return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ public class UpdateEJB {
@EJB
private IndexEJB indexEJB;
@EJB
private SearchEJB searchEJB;
@EJB
private DataEJB dataEJB;

// TODO How to make this @Asynchronous?
Expand All @@ -41,6 +43,7 @@ public void validate(Metadata metadata) {
fieldEJB.update(metadata);
chunkEJB.update(metadata);
indexEJB.update(metadata);
searchEJB.update(metadata);

logger.info("Ready");
dataEJB.setTimestamp(metadata.getLocation(), metadata.getUpdated());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import java.io.File;

import no.difi.datahotel.master.logic.DefinitionEJB;
import no.difi.datahotel.model.Definition;
import no.difi.datahotel.model.Metadata;
import no.difi.datahotel.slave.logic.FieldEJB;

Expand All @@ -15,6 +17,7 @@
public class FieldEJBTest {

private FieldEJB fieldEJB;
private DefinitionEJB definitionEJB;

private static String realHome;

Expand All @@ -33,14 +36,20 @@ public static void afterClass() {
public void before() throws Exception
{
fieldEJB = getFieldEJB();
definitionEJB = new DefinitionEJB();
}

public FieldEJB getFieldEJB() throws Exception {
return new FieldEJB();
}

@Test
public void testSimple() {
public void testSimple() throws Exception {
Definition d = new Definition();
d.setShortName("fylkenr");
d.setName("Fylkesnummer");
definitionEJB.setDefinition(d);

Metadata metadata = new Metadata();
metadata.setLocation("difi/geo/fylke");
metadata.setUpdated(System.currentTimeMillis());
Expand All @@ -49,20 +58,14 @@ public void testSimple() {
fieldEJB.update(metadata);
assertEquals(2, fieldEJB.getFields(metadata).size());

// assertEquals(3, fieldEJB.getDefinitions().size());
// assertEquals("Navn", fieldEJB.getDefinition("navn").getName());

assertEquals(1, fieldEJB.getUsage("fylkenr").size());
// assertEquals(1, fieldEJB.getUsage("fylkenr").size());
assertEquals(null, fieldEJB.getUsage("kommuner"));

// Update second
fieldEJB.update(metadata);
assertEquals(2, fieldEJB.getFields(metadata).size());

assertEquals(3, fieldEJB.getDefinitions().size());
assertEquals("Navn", fieldEJB.getDefinition("navn").getName());

assertEquals(1, fieldEJB.getUsage("fylkenr").size());
// assertEquals(1, fieldEJB.getUsage("fylkenr").size());
assertEquals(null, fieldEJB.getUsage("kommuner"));

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ public class IndexEJBTest {

private IndexEJB indexEJB;
private FieldEJB fieldEJB;

private SearchEJB search;
private SearchEJB searchEJB;

@BeforeClass
public static void beforeClass() throws Exception {
Expand All @@ -62,7 +61,7 @@ public void before() throws Exception {
settingsFieldField.setAccessible(true);
settingsFieldField.set(indexEJB, fieldEJB);

search = new SearchEJB();
searchEJB = new SearchEJB();

metadata = new Metadata();
metadata.setLocation("difi/miljo/kalkulator");
Expand All @@ -73,44 +72,38 @@ public void before() throws Exception {
public void testIndex() throws Exception {
fieldEJB.update(metadata);
indexEJB.update(metadata);
searchEJB.update(metadata);
}

@Test
public void testNoSource() {
metadata.setLocation("difi/miljo/no-exists");

indexEJB.update(metadata);
searchEJB.update(metadata);
}

@Test
public void testNoIndex() {
Exception ex = null;

metadata.setLocation("no/dataset/here");
try {
search.find(metadata, "kings", null, 1);
} catch (Exception e) {
ex = e;
}

assertNotNull(ex);
assertEquals(0, searchEJB.find(metadata, "kings", null, 1).getEntries().size());
}

@Test
public void testSearch() throws Exception {
testIndex();

assertEquals(2, search.find(metadata, "Energi", null, 1).getEntries().size());
assertEquals(0, search.find(metadata, "km", null, 1).getEntries().size());
assertEquals(1, search.find(metadata, "tog", null, 1).getEntries().size());
assertEquals(1, search.find(metadata, "ark", null, 1).getEntries().size());
assertEquals(2, search.find(metadata, "BUSS", null, 1).getEntries().size());

assertEquals(0, search.find(metadata, "Energi", null, 2).getEntries().size());
assertEquals(0, search.find(metadata, "km", null, 2).getEntries().size());
assertEquals(0, search.find(metadata, "tog", null, 2).getEntries().size());
assertEquals(0, search.find(metadata, "ark", null, 2).getEntries().size());
assertEquals(0, search.find(metadata, "BUSS", null, 2).getEntries().size());
assertEquals(2, searchEJB.find(metadata, "Energi", null, 1).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "km", null, 1).getEntries().size());
assertEquals(1, searchEJB.find(metadata, "tog", null, 1).getEntries().size());
assertEquals(1, searchEJB.find(metadata, "ark", null, 1).getEntries().size());
assertEquals(2, searchEJB.find(metadata, "BUSS", null, 1).getEntries().size());

assertEquals(0, searchEJB.find(metadata, "Energi", null, 2).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "km", null, 2).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "tog", null, 2).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "ark", null, 2).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "BUSS", null, 2).getEntries().size());
}

@Test
Expand All @@ -124,27 +117,28 @@ public void testLookupAdv() throws Exception {

fieldEJB.update(metadata);
indexEJB.update(metadata);
searchEJB.update(metadata);

Map<String, String> query = new HashMap<String, String>();
query.put("kommune", "1401");
query.put("fylke", "14");
assertEquals(1, search.find(metadata, null, query, 1).getEntries().size());
assertEquals(0, search.find(metadata, null, query, 2).getEntries().size());
assertEquals(1, searchEJB.find(metadata, null, query, 1).getEntries().size());
assertEquals(0, searchEJB.find(metadata, null, query, 2).getEntries().size());

query.clear();
query.put("kommune", "1401");
assertEquals(1, search.find(metadata, null, query, 1).getEntries().size());
assertEquals(0, search.find(metadata, null, query, 2).getEntries().size());
assertEquals(1, searchEJB.find(metadata, null, query, 1).getEntries().size());
assertEquals(0, searchEJB.find(metadata, null, query, 2).getEntries().size());

query.clear();
query.put("fylke", "14");
assertEquals(26, search.find(metadata, "", query, 1).getEntries().size());
assertEquals(0, search.find(metadata, "", query, 2).getEntries().size());
assertEquals(26, searchEJB.find(metadata, "", query, 1).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "", query, 2).getEntries().size());

query.clear();
query.put("navn", "l*anger");
assertEquals(2, search.find(metadata, "", query, 1).getEntries().size());
assertEquals(0, search.find(metadata, "", query, 2).getEntries().size());
assertEquals(2, searchEJB.find(metadata, "", query, 1).getEntries().size());
assertEquals(0, searchEJB.find(metadata, "", query, 2).getEntries().size());

indexEJB.delete("difi/geo/kommune");
}
Expand Down Expand Up @@ -253,10 +247,12 @@ public void testUpToDate() {
metadata.setLogger(logger);

indexEJB.update(metadata);
searchEJB.update(metadata);

verify(logger).info("Building index.");

indexEJB.update(metadata);
searchEJB.update(metadata);

verify(logger).info("Index up to date.");
}
Expand Down

0 comments on commit 7fc61b7

Please sign in to comment.