Permalink
Browse files

small changes as I find things that dont work or are not clear

  • Loading branch information...
1 parent a074eab commit 738d177da33004b118dbe40e237d1db88c5f9b5c @thedatachef thedatachef committed Jan 23, 2011
Showing with 31 additions and 5 deletions.
  1. +10 −0 bin/estool
  2. +2 −2 bin/wonderdog
  3. +12 −0 config/ufo_config.json
  4. +7 −3 src/java/com/infochimps/elasticsearch/wonderdog/WonderDog.java
View
@@ -9,6 +9,8 @@ Settings.define :host, :required => true, :description => "Elastic search clust
Settings.define :port, :default => 9200, :description => "Elastic search cluster port"
Settings.define :index_name, :description => "Which index to address?"
Settings.define :replicas, :default => 1, :description => "Number of replicas to set index to"
+Settings.define :object_type, :description => "When 'putting' the mapping, which object type to update"
+Settings.define :object_def_file, :description => "When 'putting' the mapping, full path to json config file describing object and its fields"
Settings.resolve!
#
@@ -83,6 +85,14 @@ task :refresh_index do
sh "curl -s -XPOST \"http://%s:%s/%s/_refresh\"" % [Settings.host, Settings.port, Settings.index_name]
end
+#
+# Puts simple mapping from json file into the given index
+#
+task :put_mapping do
+ json_def = File.read(Settings.object_def_file)
+ sh "curl -s -XPUT \"http://%s:%s/%s/%s/_mapping\" -d '#{json_def}'" % [Settings.host, Settings.port, Settings.index_name, Settings.object_type]
+end
+
Rake::Task[Settings.rest.first].invoke
View
@@ -6,7 +6,7 @@ require 'configliere' ; Configliere.use(:commandline, :env_var, :define)
Settings.define :index_name, :required => true, :description => "Index to write data to"
Settings.define :object_type, :default => "tweet", :description => "Type of object we're indexing"
Settings.define :field_names, :default => "rsrc,tweet_id,created_at,user_id,screen_name,search_id,in_reply_to_user_id,in_reply_to_screen_name,in_reply_to_search_id,in_reply_to_status_id,text,source,lang,lat,lng,retweeted_count,rt_of_user_id,rt_of_screen_name,rt_of_tweet_id,contributors", :description => "Comma separated list of field names"
-Settings.define :id_field, :default => "1", :description => "Index of field to use as object id (counting from 0; default 1)"
+Settings.define :id_field, :default => "1", :description => "Index of field to use as object id (counting from 0; default 1), use -1 if no id field"
Settings.define :bulk_size, :default => "1000", :description => "Number of records per bulk request"
Settings.define :es_home, :default => "/usr/local/share/elasticsearch", :description => "Path to elasticsearch installation",:env_var => "ES_HOME"
Settings.define :es_config, :default => "/etc/elasticsearch/elasticsearch.yml", :description => "Path to elasticsearch config"
@@ -42,7 +42,7 @@ class Wonderdog
"-Dwonderdog.index.name=#{options.index_name}",
"-Dwonderdog.object.type=#{options.object_type}",
"-Dwonderdog.id.field=#{options.id_field}",
- "-Dwonderdog.field.names=#{options.field.names}",
+ "-Dwonderdog.field.names=#{options.field_names}",
"-Dwonderdog.bulk.size=#{options.bulk_size}",
"-Dwonderdog.config=#{options.es_config}",
"-Dwonderdog.plugins.dir=#{options.es_home}/plugins",
@@ -0,0 +1,12 @@
+{
+ "ufo_sighting" : {
+ "properties" : {
+ "sighted_at" : {"type" : "string", "store" : "yes"},
+ "reported_at" : {"type" : "string", "store" : "yes"},
+ "location" : {"type" : "string", "store" : "yes"},
+ "shape" : {"type" : "string", "store" : "yes"},
+ "duration" : {"type" : "string", "store" : "yes"},
+ "description" : {"type" : "string", "store" : "yes"}
+ }
+ }
+}
@@ -92,7 +92,11 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio
}
}
builder.endObject();
- currentRequest.add(Requests.indexRequest(indexName).type(objType).id(fields[idField]).create(false).source(builder));
+ if (idField == -1) {
+ currentRequest.add(Requests.indexRequest(indexName).type(objType).create(false).source(builder));
+ } else {
+ currentRequest.add(Requests.indexRequest(indexName).type(objType).id(fields[idField]).create(false).source(builder));
+ }
processBulkIfNeeded(context);
}
@@ -134,9 +138,9 @@ protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws
// Set all task level config
Configuration conf = context.getConfiguration();
this.indexName = conf.get("wonderdog.index.name");
- this.bulkSize = conf.getInt("wonderdog.bulk.size");
+ this.bulkSize = Integer.parseInt(conf.get("wonderdog.bulk.size"));
this.fieldNames = conf.get("wonderdog.field.names").split(",");
- this.idField = conf.getInt("wonderdog.id.field");
+ this.idField = Integer.parseInt(conf.get("wonderdog.id.field"));
this.objType = conf.get("wonderdog..object.type");
System.setProperty("es.path.plugins",conf.get("wonderdog.plugins.dir"));
System.setProperty("es.config",conf.get("wonderdog.config"));

0 comments on commit 738d177

Please sign in to comment.