Permalink
Browse files

Merge remote-tracking branch 'origin/master'

  • Loading branch information...
2 parents 3a08155 + d3f3951 commit 318d6d497602d4cfc1c2eb03dd32b6283f402576 @Obus Obus committed May 15, 2012
@@ -1,6 +1,7 @@
package ru.brandanalyst.core.db.provider.mysql;
import org.hamcrest.StringDescription;
+import org.joda.time.LocalDateTime;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
import org.springframework.jdbc.core.RowCallbackHandler;
@@ -56,7 +57,7 @@ public void setValues(PreparedStatement ps, int i) throws SQLException {
ps.setString(4, (content.length() > MAX_ARTICLE_LENGHT ? content.substring(0, MAX_ARTICLE_LENGHT) : content));
ps.setString(5, a.getLink());
ps.setInt(6, a.getNumLikes());
- ps.setDate(7, new java.sql.Date(a.getTstamp().toDate().getTime()));
+ ps.setDate(7, new java.sql.Date(2012, a.getTstamp().getMonthOfYear(), a.getTstamp().getDayOfMonth()));
}
@Override
@@ -17,6 +17,7 @@
import ru.brandanalyst.core.model.Article;
import ru.brandanalyst.core.model.Brand;
import ru.brandanalyst.core.model.Params;
+import ru.brandanalyst.core.util.Cf;
import java.io.File;
import java.io.IOException;
@@ -36,24 +37,24 @@
private static final int MAX_DOC = 1000;
private String indexDirBrand;
- private String indexDirArticle;
+ //private String indexDirArticle;
private IndexSearcher indexSearcherBrand;
- private IndexSearcher indexSearcherArticle;
+ //private IndexSearcher indexSearcherArticle;
@Required
public void setIndexDirBrand(String indexDirBrand) {
this.indexDirBrand = indexDirBrand;
}
- @Required
- public void setIndexDirArticle(String indexDirArticle) {
- this.indexDirArticle = indexDirArticle;
- }
+ //@Required
+ //public void setIndexDirArticle(String indexDirArticle) {
+ // this.indexDirArticle = indexDirArticle;
+ //}
@Override
public void afterPropertiesSet() throws Exception {
indexSearcherBrand = new IndexSearcher(new SimpleFSDirectory(new File(indexDirBrand)));
- indexSearcherArticle = new IndexSearcher(new SimpleFSDirectory(new File(indexDirArticle)));
+ // indexSearcherArticle = new IndexSearcher(new SimpleFSDirectory(new File(indexDirArticle)));
}
/**
@@ -85,7 +86,7 @@ public void afterPropertiesSet() throws Exception {
* Поиск по новостям на основе их содержания
*/
public List<Article> searchArticleByContent(String query) {
- try {
+ /* try {
Analyzer analyzer;
analyzer = new RussianAnalyzer(Version.LUCENE_34); // your can change version
QueryParser contentParser = new QueryParser(Version.LUCENE_34, "Content", analyzer);
@@ -102,10 +103,10 @@ public void afterPropertiesSet() throws Exception {
throw new RuntimeException("index access failed", e);
} catch (ParseException e) {
throw new RuntimeException("invalid query", e);
- }
+ } */return Cf.newArrayList();
}
- private Brand brandMap(Document doc) {
+ private static Brand brandMap(Document doc) {
return new Brand(
Long.parseLong(doc.get("Id")),
doc.get("Name"),
@@ -115,7 +116,7 @@ private Brand brandMap(Document doc) {
);
}
- private Article articleMap(Document doc) {
+ private static Article articleMap(Document doc) {
return new Article(
Long.parseLong(doc.get("Id")),
Long.parseLong(doc.get("BrandId")),
@@ -126,5 +127,6 @@ private Article articleMap(Document doc) {
new LocalDateTime(Long.parseLong(doc.get("Tstamp"))),
Integer.parseInt(doc.get("NumLikes"))
);
+
}
}
@@ -0,0 +1,140 @@
+package ru.brandanalyst.core.tmp.index;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.analysis.ru.RussianAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.lucene.util.Version;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Required;
+import ru.brandanalyst.core.db.provider.ProvidersHandler;
+import ru.brandanalyst.core.db.provider.interfaces.ArticleProvider;
+import ru.brandanalyst.core.db.provider.interfaces.BrandProvider;
+import ru.brandanalyst.core.model.Article;
+import ru.brandanalyst.core.model.Brand;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * @author daddy-bear
+ * Date: 15.05.12 - 9:29
+ */
+
+public class IndexBuilder implements InitializingBean {
+ private static final Logger log = Logger.getLogger(IndexBuilder.class);
+
+ private String directoryBrand;
+ private String directoryArticle;
+ private ProvidersHandler providersHandler;
+
+ @Required
+ public void setProvidersHandler(ProvidersHandler providersHandler) {
+ this.providersHandler = providersHandler;
+ }
+
+ @Required
+ public void setDirectoryBrand(String directoryBrand) {
+ this.directoryBrand = directoryBrand;
+ }
+
+ @Required
+ public void setDirectoryArticle(String directoryArticle) {
+ this.directoryArticle = directoryArticle;
+ }
+
+
+ @Override
+ public void afterPropertiesSet() throws Exception {
+ try {
+ SimpleFSDirectory indexDirectoryBrand = new SimpleFSDirectory(new File(directoryBrand));
+ IndexWriter brandwriter = new IndexWriter(indexDirectoryBrand, new RussianAnalyzer(Version.LUCENE_34), IndexWriter.MaxFieldLength.UNLIMITED); //create pre'index
+ //SimpleFSDirectory indexDirectoryArticle = new SimpleFSDirectory(new File(directoryArticle));
+ //IndexWriter articlewriter = new IndexWriter(indexDirectoryArticle, new RussianAnalyzer(Version.LUCENE_34), IndexWriter.MaxFieldLength.UNLIMITED); //create pre'index
+
+ brandIndex(brandwriter);
+ //articleIndex(articlewriter);
+
+ //articlewriter.optimize();
+ brandwriter.optimize();
+ //articlewriter.close();
+ brandwriter.close();
+ log.info("Index created.");
+ } catch (IOException e) {
+ log.error("Cannot create index");
+ }
+ }
+
+ /**
+ * индексация новостей
+ */
+ private void articleIndex(IndexWriter writer) {
+ log.info("indexing articles");
+ ArticleProvider provider = providersHandler.getArticleProvider();
+ List<Article> list = provider.getAllArticles();
+ try {
+ for (Article item : list) { //add to pre'index all brand's
+ Document doc = createDocument(item);
+ writer.addDocument(doc);
+ }
+ } catch (IOException e) {
+ log.error("no articles to index");
+ }
+ }
+
+
+ /**
+ * индексация брендов
+ */
+ private void brandIndex(IndexWriter writer) {
+
+ log.info("indexing brands");
+ BrandProvider provider = providersHandler.getBrandProvider();
+
+ try {
+ List<Brand> list = provider.getAllBrands();
+ for (Brand item : list) { //add to pre'index all brand's
+ Document doc = createDocument(item);
+ writer.addDocument(doc);
+ }
+ } catch (IOException e) {
+ log.error("no brands to index");
+ }
+ }
+
+ /**
+ * метод, создающий из новости единицу индекса
+ */
+ private Document createDocument(Article a) {
+ Document doc = new Document();
+
+ doc.add(new Field("Id", Long.toString(a.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("InfoSourceId", Long.toString(a.getSourceId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("BrandId", Long.toString(a.getSourceId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("NumLikes", Long.toString(a.getNumLikes()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("Link", a.getLink(), Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("Tstamp", Long.toString(a.getTstamp().toDate().getTime()), Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("Content", a.getContent(), Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("Title", a.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
+
+ return doc;
+ }
+
+ /**
+ * метод, создающий из бренда единицу индекса
+ */
+ private Document createDocument(Brand b) { //create document
+
+ Document doc = new Document();
+
+ doc.add(new Field("Id", Long.toString(b.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("Name", b.getName(), Field.Store.YES, Field.Index.ANALYZED)); // create name
+ doc.add(new Field("Description", b.getDescription(), Field.Store.YES, Field.Index.ANALYZED)); //create description
+ doc.add(new Field("Website", b.getWebsite(), Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("BranchId", Long.toString(b.getBranchId()), Field.Store.YES, Field.Index.NOT_ANALYZED));
+ return doc;
+ }
+}
@@ -13,6 +13,14 @@
</bean>
<!--Spring helpers-->
<bean class="org.springframework.beans.factory.annotation.RequiredAnnotationBeanPostProcessor"/>
+
+
+
+ <bean id="indexBuilder" class="ru.brandanalyst.core.tmp.index.IndexBuilder">
+ <property name="directoryBrand" value="index_brand/"/>
+ <property name="directoryArticle" value="index_article/"/>
+ <property name="providersHandler" ref="providersHandler"/>
+ </bean>
<!--DB CONFIG-->
<import resource="classpath:dbhandler_pure.xml"/>
<alias alias="providersHandler" name="pureProvidersHandler"/>
@@ -79,7 +87,7 @@
<!--Searcher conf-->
<bean id="searcher" class="ru.brandanalyst.core.searcher.Searcher">
<property name="indexDirBrand" value="index_brand/"/>
- <property name="indexDirArticle" value="index_article/"/>
+ <!--<property name="indexDirArticle" value="index_article/"/>-->
</bean>
</beans>
@@ -6,15 +6,16 @@ PID=brand-analytics-server.pid
start() {
LANG=ru_RU.UTF8
- CP1=lib/*/*.jar
- CP2=lib/*.jar
+ #CP1=lib/*/*.jar
+ #CP2=lib/*.jar
+ CLASSPATH=`find lib -name '*.jar' -printf '%p:'`$CLASSPATH
if [ -s $PID ]; then
echo already started.
return
fi
- java -cp $( echo $CP1 + $CP2 . | sed 's/ /:/g') \
+ java -classpath $CLASSPATH \
-Dfile.encoding=UTF8 \
-Dorg.apache.commons.logging.LogFactory=org.apache.commons.logging.impl.Log4jFactory \
-Djavax.xml.transform.TransformerFactory=net.sf.saxon.TransformerFactoryImpl \
@@ -22,10 +22,10 @@
<bean class="org.springframework.scheduling.timer.TimerFactoryBean">
<property name="scheduledTimerTasks">
<list>
- <!-- <ref bean="finamGrabberTask"/>
- <ref bean="rssGrabberTask"/>
- <ref bean="analyzerProcessor"/> -->
- <ref bean="mentionProcessor"/>
+ <ref bean="finamGrabberTask"/>
+ <ref bean="rssGrabberTask"/>
+ <ref bean="analyzerProcessor"/>
+ <!--<ref bean="mentionProcessor"/>-->
</list>
</property>
</bean>

0 comments on commit 318d6d4

Please sign in to comment.