Permalink
Browse files

Hadoop too-small stack size killed tasks on Ubuntu 12

Finally figured out that Hadoop was dying because

```
The stack size specified is too small, Specify at least 160k
Could not create the Java virtual machine.
```

(That's according to oracle java, but not the prior sun java).

This wasn't appearing in the jobtracker's log view; I had to look in var/log/hadoop/userlogs to see the JVM's epitaph. Note that that directory is read-protected, so you will have to sudo to see it.

Error in jobtracker logs looked like

```
2013-01-12 20:30:47,605 INFO org.apache.hadoop.mapred.JobTracker: Adding task (JOB_CLEANUP) 'attempt_201301120857_0005_m_000001_3' to tip task_201301120857_0005_m_000001, for tracker 'tracker_ip-10-113-42-223.ec2.internal:localhost/127.0.0.1:54742'
2013-01-12 20:30:47,605 INFO org.apache.hadoop.mapred.JobTracker: Removing task 'attempt_201301120857_0005_m_000001_2'
2013-01-12 20:30:47,908 INFO org.apache.hadoop.mapred.TaskInProgress: Error from attempt_201301120857_0005_m_000001_3: java.lang.Throwable: Child Error
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:242)
Caused by: java.io.IOException: Task process exit with nonzero status of 1.
at org.apache.hadoop.mapred.TaskRunner.run(TaskRunner.java:229)
```
  • Loading branch information...
1 parent a6e53ee commit 31f5a0c4d4d5dc5c2e3219b3c12cebc8c75bd138 Philip (flip) Kromer committed Jan 12, 2013
@@ -158,7 +158,7 @@ Cookbook dependencies:
- The default block size for new files
* `[:hadoop][:max\_map_tasks]` - (default: "3")
* `[:hadoop][:max\_reduce_tasks]` - (default: "2")
-* `[:hadoop][:java\_child_opts]` - (default: "-Xmx2432m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server")
+* `[:hadoop][:java\_child_opts]` - (default: "-Xmx2432m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server")
* `[:hadoop][:java\_child_ulimit]` - (default: "7471104")
* `[:hadoop][:io\_sort_factor]` - (default: "25")
* `[:hadoop][:io\_sort_mb]` - (default: "250")
@@ -140,17 +140,17 @@
#
hadoop_performance_settings =
case node[:ec2] && node[:ec2][:instance_type]
- when 't1.micro' then { :max_map_tasks => 1, :max_reduce_tasks => 1, :java_child_opts => '-Xmx256m -Xss128k', :java_child_ulimit => 2227200, :io_sort_factor => 10, :io_sort_mb => 64, }
- when 'm1.small' then { :max_map_tasks => 2, :max_reduce_tasks => 1, :java_child_opts => '-Xmx870m -Xss128k', :java_child_ulimit => 2227200, :io_sort_factor => 10, :io_sort_mb => 100, }
- when 'c1.medium' then { :max_map_tasks => 3, :max_reduce_tasks => 2, :java_child_opts => '-Xmx870m -Xss128k', :java_child_ulimit => 2227200, :io_sort_factor => 10, :io_sort_mb => 100, }
- when 'm1.large' then { :max_map_tasks => 4, :max_reduce_tasks => 2, :java_child_opts => '-Xmx2432m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 7471104, :io_sort_factor => 25, :io_sort_mb => 200, }
- when 'c1.xlarge' then { :max_map_tasks => 10, :max_reduce_tasks => 4, :java_child_opts => '-Xmx870m -Xss128k', :java_child_ulimit => 2227200, :io_sort_factor => 20, :io_sort_mb => 200, }
- when 'm1.xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 3, :java_child_opts => '-Xmx1920m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 5898240, :io_sort_factor => 25, :io_sort_mb => 210, }
- when 'm2.xlarge' then { :max_map_tasks => 3, :max_reduce_tasks => 2, :java_child_opts => '-Xmx4531m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 32, :io_sort_mb => 210, }
- when 'm2.2xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 4, :java_child_opts => '-Xmx4378m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 32, :io_sort_mb => 210, }
- when 'm2.4xlarge' then { :max_map_tasks => 12, :max_reduce_tasks => 4, :java_child_opts => '-Xmx4378m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 40, :io_sort_mb => 210, }
- when 'cc1.4xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 6, :java_child_opts => '-Xmx1800m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 40, :io_sort_mb => 420, :hdfs_block_size => (256 * megabyte), }
- when 'cc1.8xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 3, :java_child_opts => '-Xmx6000m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 40, :io_sort_mb => 840, :hdfs_block_size => (512 * megabyte), } # the large block size, and this machine in general, are only appropriate if you're bringing some bigass data.
+ when 't1.micro' then { :max_map_tasks => 1, :max_reduce_tasks => 1, :java_child_opts => '-Xmx256m -Xss160k', :java_child_ulimit => 2227200, :io_sort_factor => 10, :io_sort_mb => 64, }
+ when 'm1.small' then { :max_map_tasks => 2, :max_reduce_tasks => 1, :java_child_opts => '-Xmx870m -Xss160k', :java_child_ulimit => 2227200, :io_sort_factor => 10, :io_sort_mb => 100, }
+ when 'c1.medium' then { :max_map_tasks => 3, :max_reduce_tasks => 2, :java_child_opts => '-Xmx870m -Xss256k', :java_child_ulimit => 2227200, :io_sort_factor => 10, :io_sort_mb => 100, }
+ when 'm1.large' then { :max_map_tasks => 4, :max_reduce_tasks => 2, :java_child_opts => '-Xmx2432m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 7471104, :io_sort_factor => 25, :io_sort_mb => 200, }
+ when 'c1.xlarge' then { :max_map_tasks => 10, :max_reduce_tasks => 4, :java_child_opts => '-Xmx870m -Xss256k', :java_child_ulimit => 2227200, :io_sort_factor => 20, :io_sort_mb => 200, }
+ when 'm1.xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 3, :java_child_opts => '-Xmx1920m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 5898240, :io_sort_factor => 25, :io_sort_mb => 210, }
+ when 'm2.xlarge' then { :max_map_tasks => 3, :max_reduce_tasks => 2, :java_child_opts => '-Xmx4531m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 32, :io_sort_mb => 210, }
+ when 'm2.2xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 4, :java_child_opts => '-Xmx4378m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 32, :io_sort_mb => 210, }
+ when 'm2.4xlarge' then { :max_map_tasks => 12, :max_reduce_tasks => 4, :java_child_opts => '-Xmx4378m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 40, :io_sort_mb => 210, }
+ when 'cc1.4xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 6, :java_child_opts => '-Xmx1800m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 40, :io_sort_mb => 420, :hdfs_block_size => (256 * megabyte), }
+ when 'cc1.8xlarge' then { :max_map_tasks => 6, :max_reduce_tasks => 3, :java_child_opts => '-Xmx6000m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server', :java_child_ulimit => 13447987, :io_sort_factor => 40, :io_sort_mb => 840, :hdfs_block_size => (512 * megabyte), } # the large block size, and this machine in general, are only appropriate if you're bringing some bigass data.
else
if node[:memory] && node[:cores]
cores = node[:cpu ][:total].to_i
@@ -130,7 +130,7 @@
attribute "hadoop/java_child_opts",
:display_name => "",
:description => "",
- :default => "-Xmx2432m -Xss128k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server"
+ :default => "-Xmx2432m -Xss256k -XX:+UseCompressedOops -XX:MaxNewSize=200m -server"
attribute "hadoop/java_child_ulimit",
:display_name => "",

0 comments on commit 31f5a0c

Please sign in to comment.