Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Turned on CompressedOOPs; making it run with multiple esnodes per box

  • Loading branch information...
commit 603c9a5eaef575b3bb4119e6fadcecf2422d7490 1 parent 7ea7a2f
Philip (flip) Kromer authored
View
16 README-benchmarking.txt
@@ -3,6 +3,22 @@
# . work data es_node heavily but dont drive it into the ground
# . tune lucene + jvm options for data es_node
+14 files, 3 hadoop nodes, 3 tasktrackers each 27 min
+14 files, 3 hadoop nodes, 5 tasktrackers each 22 min
+
+
+The refresh API allows to explicitly refresh an one or more index, making all
+operations performed since the last refresh available for search. The (near)
+real-time capabilities depends on the index engine used. For example, the robin
+one requires refresh to be called, but by default a refresh is scheduled
+periodically.
+
+curl -XPOST 'http://localhost:9200/twitter/_refresh'
+
+The refresh API can be applied to more than one index with a single call, or even on _all the indices.
+
+
+
runs:
- es_machine: m1.xlarge
es_nodes: 1
View
10 config/elasticsearch.in.sh
@@ -1,13 +1,12 @@
CLASSPATH=$CLASSPATH:$ES_HOME/lib/elasticsearch-0.11.0.jar:$ES_HOME/lib/*:$ES_HOME/lib/sigar/*
if [ "x$ES_MIN_MEM" = "x" ]; then
- ES_MIN_MEM=256m
+ ES_MIN_MEM=256m
fi
if [ "x$ES_MAX_MEM" = "x" ]; then
- ES_MAX_MEM=1500m
+ ES_MAX_MEM=1500m
fi
-
# Arguments to pass to the JVM
JAVA_OPTS="$JAVA_OPTS -Xms${ES_MIN_MEM}"
JAVA_OPTS="$JAVA_OPTS -Xmx${ES_MAX_MEM}"
@@ -25,14 +24,15 @@ JAVA_OPTS="$JAVA_OPTS -XX:MaxTenuringThreshold=1"
JAVA_OPTS="$JAVA_OPTS -XX:+HeapDumpOnOutOfMemoryError"
JAVA_OPTS="$JAVA_OPTS -XX:HeapDumpPath=$ES_HOME/work/heap"
+JAVA_OPTS="$JAVA_OPTS -XX:+UseCompressedOops"
# More options to consider LATER
# java.net.preferIPv4Stack=true: Better OOTB experience, especially with jgroups
# -XX:CMSInitiatingOccupancyFraction=88
-# -XX:+UseCompressedOops
+#
+export JAVA_OPTS ES_MAX_MEM ES_MIN_MEM
#
# ES_CONF_DIR=$HOME/Programming/wonderdog/config ;sudo -u hadoop ES_INCLUDE=$ES_CONF_DIR/elasticsearch.in.sh /usr/lib/elasticsearch/bin/elasticsearch -Des.config=$ES_CONF_DIR/elasticsearch.yml
#
-
View
60 config/more_settings.yml
@@ -0,0 +1,60 @@
+#
+# This file isn't read for any reason -- it's
+# a dumping ground for annotated config sections
+#
+
+
+gateway:
+ # Settings for gateway.type = s3
+ s3:
+ bucket: infochimps-elasticsearch
+
+gateway:
+ fs:
+ # By default, uses the 'path.work' directory Note, the work directory is
+ # considered a temporal directory with ElasticSearch (meaning it is safe
+ # to rm -rf it), the default location of the persistent gateway in work
+ # intentional, it should be changed.
+ #
+ # When explicitly specifying the gateway.fs.location, each node will
+ # append its cluster.name to the provided location. It means that the
+ # location provided can safely support several clusters.
+ #
+ # The file system gateway automatically sets for each index created to use
+ # an fs index gateway. The location specified using gateway.fs.location
+ # will automatically be used in this case to store index level data
+ # (appended by the index name).
+ location: /mnt2/elasticsearch/fs
+
+discovery:
+
+ zen:
+ # == How should gossip be conducted?
+ ping:
+ multicast:
+ enabled: false
+ # group: 224.2.2.4
+ # port: 54328
+ # ttl: 3
+ # address: null
+ unicast:
+ # # Either a YAML array or a comma delimited string.
+ # # Each value is either in the form of host:port, or in the form of host[port1-port2].
+ # hosts:
+ # == Zen master election:
+ # As part of the initial ping process a master of the cluster is either
+ # elected or joined to. This is done automatically. The
+ # discovery.zen.initial_ping_timeout (which defaults to 3s) allows to
+ # configure the election to handle cases of slow or congested networks
+ # (higher values assure less chance of failure).
+ initial_ping_timeout: 3s
+ # # Allow node to become master? Note, once a node is a client node
+ # # (node.client = true), it will not be allowed to become a master
+ # # (zen.master is automatically set to false).
+ # master: ~
+ # == Zen Fault detection:
+ fd:
+ ping_interval: 1s
+ ping_timeout: 30s
+ ping_retries 3
+
View
BIN  java/build/elastic_bulk_loader.jar
Binary file not shown
View
BIN  java/build/elastic_bulk_loader/ElasticBulkLoader$Map.class
Binary file not shown
View
BIN  java/build/elastic_bulk_loader/ElasticBulkLoader.class
Binary file not shown
View
6 java/src/ElasticBulkLoader.java
@@ -53,7 +53,7 @@
public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
add_tweet_to_bulk(value);
- if (randgen.nextDouble() < 0.01) { output.collect(key, value); }
+ if (randgen.nextDouble() < 0.001) { output.collect(key, value); }
}
public void add_tweet_to_bulk(Text value) {
@@ -79,7 +79,7 @@ public void add_tweet_to_bulk(Text value) {
currentRequest.add(Requests.indexRequest("foo").type("tweet").id(fields[0]).create(true).source(builder));
processBulkIfNeeded();
} catch (Exception e) {
- // System.out.println("There was some sort of problem here in trying to create a new index request");
+ System.out.println("There was some sort of problem here in trying to create a new index request");
}
}
@@ -92,7 +92,7 @@ private void processBulkIfNeeded() {
long startTime = System.currentTimeMillis();
BulkResponse response = currentRequest.execute().actionGet();
totalBulkTime.addAndGet(System.currentTimeMillis() - startTime);
- if (randgen.nextDouble() < 0.01) {
+ if (randgen.nextDouble() < 0.001) {
System.out.println("Indexed [" + totalBulkItems.get() + "] in [" + totalBulkTime.get() + "ms]"+" for ["+ (float)(1000.0*totalBulkItems.get())/totalBulkTime.get() + "rec/s]");
}
if (response.hasFailures()) {
Please sign in to comment.
Something went wrong with that request. Please try again.