/
Searcher.java
132 lines (117 loc) · 4.87 KB
/
Searcher.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package ru.brandanalyst.core.searcher;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.joda.time.LocalDateTime;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Required;
import ru.brandanalyst.core.model.Article;
import ru.brandanalyst.core.model.Brand;
import ru.brandanalyst.core.model.Params;
import ru.brandanalyst.core.util.Cf;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: dima
* Date: 2/9/12
* Time: 3:20 PM
*/
public class Searcher implements InitializingBean {
/**
* максимальное количество документов, которые заполняют выдачу
*/
private static final int MAX_DOC = 1000;
private String indexDirBrand;
//private String indexDirArticle;
private IndexSearcher indexSearcherBrand;
//private IndexSearcher indexSearcherArticle;
@Required
public void setIndexDirBrand(String indexDirBrand) {
this.indexDirBrand = indexDirBrand;
}
//@Required
//public void setIndexDirArticle(String indexDirArticle) {
// this.indexDirArticle = indexDirArticle;
//}
@Override
public void afterPropertiesSet() throws Exception {
indexSearcherBrand = new IndexSearcher(new SimpleFSDirectory(new File(indexDirBrand)));
// indexSearcherArticle = new IndexSearcher(new SimpleFSDirectory(new File(indexDirArticle)));
}
/**
* Поиск по брендам на основе их описания
*/
public List<Brand> searchBrandByDescription(String query) {
try {
Analyzer analyzer; // your can change version
analyzer = new RussianAnalyzer(Version.LUCENE_34);
QueryParser descriptionQueryParser = new MultiFieldQueryParser(Version.LUCENE_34, new String[]{"Name", "Description"}, analyzer);
Query descriptionQuery = descriptionQueryParser.parse(query);
ScoreDoc[] hits = indexSearcherBrand.search(descriptionQuery, null, MAX_DOC).scoreDocs; // you maybe change null on filter;
List<Brand> lst = new ArrayList<Brand>();
for (ScoreDoc hit : hits) {
Document doc = indexSearcherBrand.doc(hit.doc);
lst.add(brandMap(doc));
}
return lst;
} catch (IOException e) {
throw new RuntimeException("index access failed", e);
} catch (ParseException e) {
throw new RuntimeException("invalid query", e);
}
}
/**
* Поиск по новостям на основе их содержания
*/
public List<Article> searchArticleByContent(String query) {
/* try {
Analyzer analyzer;
analyzer = new RussianAnalyzer(Version.LUCENE_34); // your can change version
QueryParser contentParser = new QueryParser(Version.LUCENE_34, "Content", analyzer);
Query contentQuery = contentParser.parse(query);
ScoreDoc[] hits = indexSearcherArticle.search(contentQuery, null, MAX_DOC).scoreDocs; // you maybe change null on filter;
List<Article> lst = new ArrayList<Article>();
for (ScoreDoc hit : hits) {
Document doc = indexSearcherArticle.doc(hit.doc);
lst.add(articleMap(doc));
}
return lst;
} catch (IOException e) {
throw new RuntimeException("index access failed", e);
} catch (ParseException e) {
throw new RuntimeException("invalid query", e);
} */return Cf.newArrayList();
}
private static Brand brandMap(Document doc) {
return new Brand(
Long.parseLong(doc.get("Id")),
doc.get("Name"),
doc.get("Description"),
doc.get("Website"),
Long.parseLong(doc.get("BranchId")), Params.empty("")
);
}
private static Article articleMap(Document doc) {
return new Article(
Long.parseLong(doc.get("Id")),
Long.parseLong(doc.get("BrandId")),
Long.parseLong(doc.get("InfoSourceId")),
doc.get("Title"),
doc.get("Content"),
doc.get("Link"),
new LocalDateTime(Long.parseLong(doc.get("Tstamp"))),
Integer.parseInt(doc.get("NumLikes"))
);
}
}