Skip to content
livedoor gourmet(ロケタッチグルメ) search demo with Elasticsearch
Find file
Pull request Compare This branch is even with penguinco:master.
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Failed to load latest commit information.
app
config
db
doc
lib
log
public
script
vendor
.gitignore
Gemfile
Gemfile.lock
README.md
Rakefile
config.ru

README.md

elasticsearch demo app

elasticsearchのデモ用のプロジェクトです。 題材としてlivedoorグルメ(ロケタッチグルメ)の公開データセット利用します。 これらを検索可能にしていく中で基本的なelasticsearchの使い方を習得することが目的です。

setup

elasticsearch

brew install elasticsearch or download latest elasticsearch http://www.elasticsearch.org/download/ unzip and bin/elasticsearch -f

install kuromoji

cd elasticsearch
bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.0.0
git clone git://github.com/elasticsearch/elasticsearch-analysis-kuromoji.git
cd elasticsearch-analysis-kuromoji/
mvn clean package
cp target/elasticsearch-analysis-kuromoji-1.2.0-SNAPSHOT.jar ../plugins/analysis-kuromoji/elasticsearch-analysis-kuromoji-1.0.0.jar
#restart elasticsearch

rails app

bundle install
bundle exec rails s

livedoor gourmet

cd ld_gourmet_search
mkdir data
cd data
open http://blog.livedoor.jp/techblog/archives/65836960.html
#!!!READ terms of use!!!
wget ldgourmet.tar.gz # find full path from article.
tar xzvf ldgourmet.tar.gz

ls -alht data/ldgourmet 
total 586896
drwx------@ 9 penguinco  staff   306B 11 24 19:58 .
drwxr-xr-x  3 penguinco  staff   102B 11 23 20:09 ..
-rw-r--r--@ 1 penguinco  staff   5.7M  4 22  2011 rating_votes.csv
-rw-r--r--@ 1 penguinco  staff   224M  4 22  2011 ratings.csv
-rw-r--r--@ 1 penguinco  staff    15K  4 22  2011 categories.csv
-rw-r--r--@ 1 penguinco  staff   553K  4 22  2011 stations.csv
-rw-r--r--@ 1 penguinco  staff   9.3K  4 22  2011 areas.csv
-rw-r--r--@ 1 penguinco  staff   713B  4 22  2011 prefs.csv
-rw-r--r--@ 1 penguinco  staff    57M  4 22  2011 restaurants.csv

indexing

elasticsearch config

cd elasticsearch
touch config/synonym.txt

# cat script/livedoor_gourmet_setting.json
curl -XPUT 'localhost:9200/livedoor_gourmet/' -d'                           
{ 
  "index":{       
    "number_of_shards":2,
    "number_of_replicas":1,                               
    "analysis":{
      "filter":{
        "synonym" : {
          "type" : "synonym",
          "synonyms_path" : "synonym.txt"
        }
      },
      "tokenizer" : {
        "kuromoji" : {
          "type":"kuromoji_tokenizer",
          "mode":"search"
        }
      },
      "analyzer" : {
        "default" : {
          "type" : "custom",
          "tokenizer" : "kuromoji_tokenizer",
          "filter" : ["synonym"]
        }
      }
    }
  }
}
'

# cat script/livedoor_gourmet_mapping.json

curl -XPUT 'http://localhost:9200/livedoor_gourmet/restaurant/_mapping' -d '
{                                                   
  "restaurant" : {
    "properties" : {
      "location" : {"type" : "geo_point", "store" : "yes"}
    }
  }
}
'

### indexing via Tire gem
# 20 minutes
bundle exec rails runner script/import_restaurant.rb
bundle exec rails runner script/import_rating.rb

enjoy

Something went wrong with that request. Please try again.