From 9131eb99d447435a889dc123a6822fa34728e8a9 Mon Sep 17 00:00:00 2001
From: Nathaniel Eliot <temujin9@t9productions.com>
Date: Tue, 13 Nov 2012 12:26:00 -0600
Subject: [PATCH] More cleanup of vendored examples into homebase

---
 example_clusters/big_hadoop.rb     |  99 +++++++++++++
 example_clusters/burninator.rb     |  85 +++++++++++
 example_clusters/control.rb        |  57 ++++++++
 example_clusters/el_ridiculoso.rb  | 226 +++++++++++++++++++++++++++++
 example_clusters/elastic_hadoop.rb | 112 ++++++++++++++
 example_clusters/es.rb             |  59 ++++++++
 example_clusters/hb.rb             | 160 ++++++++++++++++++++
 example_clusters/metropolis.rb     |  58 ++++++++
 example_clusters/sandbox.rb        |  57 ++++++++
 example_clusters/web.rb            |  90 ++++++++++++
 example_clusters/zk.rb             |  71 +++++++++
 vendor/README.md                   |   6 -
 12 files changed, 1074 insertions(+), 6 deletions(-)
 create mode 100644 example_clusters/big_hadoop.rb
 create mode 100644 example_clusters/burninator.rb
 create mode 100644 example_clusters/control.rb
 create mode 100644 example_clusters/el_ridiculoso.rb
 create mode 100644 example_clusters/elastic_hadoop.rb
 create mode 100644 example_clusters/es.rb
 create mode 100644 example_clusters/hb.rb
 create mode 100644 example_clusters/metropolis.rb
 create mode 100644 example_clusters/sandbox.rb
 create mode 100644 example_clusters/web.rb
 create mode 100644 example_clusters/zk.rb
 delete mode 100644 vendor/README.md

diff --git a/example_clusters/big_hadoop.rb b/example_clusters/big_hadoop.rb
new file mode 100644
index 00000000..a21d5f43
--- /dev/null
+++ b/example_clusters/big_hadoop.rb
@@ -0,0 +1,99 @@
+#
+# Production cluster -- no persistent HDFS
+#
+# !!Important setup steps!!:
+#
+# Launch the cluster with the hadoop daemon run states set to 'stop' -- see the
+# section most of the way down the page.
+#
+# After initial bootstrap,
+# * set the run_state to :start in the lines below
+# * run `knife cluster sync` to push those values up to chef
+# * run `knife cluster kick` to re-converge
+#
+# As soon as you see 'nodes=1' on jobtracker (host:50030) & namenode (host:50070)
+# control panels, you're good to launch the rest of the cluster.
+#
+Ironfan.cluster 'big_hadoop' do
+  cloud(:ec2) do
+    permanent           false
+    availability_zones ['us-east-1d']
+    flavor              'm1.large'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals(:tags => { :hadoop_scratch => true, :hadoop_data => true, :persistent => false, :bulk => true  })
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :hadoop
+  role                  :hadoop_s3_keys
+  recipe                'hadoop_cluster::config_files', :last
+  role                  :zookeeper_client, :last
+  role                  :hbase_client,     :last
+
+  role                  :jruby
+  role                  :pig
+  recipe                :rstats
+
+  role                  :tuning,        :last
+
+  facet :master do
+    instances           1
+    role                :hadoop_namenode
+    role                :hadoop_secondarynn
+    role                :hadoop_jobtracker
+    role                :hadoop_datanode
+    role                :hadoop_tasktracker
+  end
+
+  facet :worker do
+    instances           2
+    role                :hadoop_datanode
+    role                :hadoop_tasktracker
+  end
+
+  cluster_role.override_attributes({
+      :hadoop => {
+        # more jobtracker heap size for running large-mapper-count jobs
+        :jobtracker  => { :java_heap_size_max => 2000, },
+        # lets you rapidly decommission nodes for elasticity
+        :balancer    => { :max_bandwidth => (50 * 1024 * 1024) },
+        # make mid-flight data much smaller -- useful esp. with ec2 network constraints
+        :compress_mapout_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
+      },
+    })
+
+  #
+  # Hadoop Daemon run states
+  #
+  facet(:master).facet_role.override_attributes({
+      :hadoop => {
+        :namenode     => { :run_state => :stop,  },
+        :secondarynn  => { :run_state => :stop,  },
+        :jobtracker   => { :run_state => :stop,  },
+        :datanode     => { :run_state => :stop,  },
+        :tasktracker  => { :run_state => :stop,  },
+      },
+    })
+
+end
diff --git a/example_clusters/burninator.rb b/example_clusters/burninator.rb
new file mode 100644
index 00000000..c8876683
--- /dev/null
+++ b/example_clusters/burninator.rb
@@ -0,0 +1,85 @@
+#
+# Burninator cluster -- populate an AMI with installed software, but no
+# services, users or other preconceptions.
+#
+# The script /tmp/burn_ami_prep.sh will help finalize the machine -- then, just
+# stop it and invoke 'Create Image (EBS AMI)'.
+#
+Ironfan.cluster 'burninator' do
+  cloud(:ec2) do
+    availability_zones ['us-east-1d']
+    # use a c1.xlarge so the AMI knows about all ephemeral drives
+    flavor              'c1.xlarge'
+    backing             'ebs'
+    # image_name is per-facet here
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals
+  end
+
+  environment           :dev
+
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+
+  # It's handy to have the root volumes not go away with the machine.
+  # It also means you can find yourself with a whole ton of stray 8GB
+  # images once you're done burninatin' so make sure to go back and
+  # clear them out
+  volume(:root).keep    true
+
+  #
+  # A throwaway facet for AMI generation
+  #
+  facet :trogdor do
+    instances           1
+
+    cloud(:ec2).image_name    'natty'  # Leave set at vanilla natty
+
+    recipe              'cloud_utils::burn_ami_prep'
+
+    role                :package_set, :last
+
+    recipe              'ant'
+    recipe              'boost'
+    recipe              'build-essential'
+    recipe              'emacs'
+    recipe              'git'
+    recipe              'java::sun'
+    recipe              'jpackage'
+    recipe              'jruby'
+    recipe              'jruby::gems'
+    recipe              'nodejs'
+    recipe              'ntp'
+    recipe              'openssl'
+    recipe              'pig::install_from_release'
+    recipe              'hadoop_cluster::add_cloudera_repo'
+    recipe              'runit'
+    recipe              'thrift'
+    recipe              'xfs'
+    recipe              'xml'
+    recipe              'zlib'
+    recipe              'zsh'
+
+    facet_role.override_attributes({
+        :java        => { :install_flavor => 'sun' }, # use sun java typically
+        :package_set => { :install => %w[ base dev sysadmin text python emacs ] },
+        :apt         => { :cloudera => { :force_distro => 'maverick',  }, },
+      })
+  end
+
+  #
+  # Used to test the generated AMI.
+  #
+  facet :village do
+    instances     1
+    # Once the AMI is burned, add a new entry in your knife configuration -- see
+    # knife/example-credentials/knife-org.rb. Fill in its name here:
+    cloud(:ec2).image_name    'ironfan-natty'
+
+    # just so there's something in the runlist.
+    recipe              'motd'
+  end
+
+end
diff --git a/example_clusters/control.rb b/example_clusters/control.rb
new file mode 100644
index 00000000..f85cb0dc
--- /dev/null
+++ b/example_clusters/control.rb
@@ -0,0 +1,57 @@
+#
+# Command and control cluster
+#
+Ironfan.cluster 'control' do
+  cloud(:ec2) do
+    permanent           true
+    availability_zones ['us-east-1d']
+    flavor              't1.micro'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  facet :nfs do
+    role                :nfs_server
+    cloud(:ec2).security_group(:nfs_server).authorize_group :nfs_client
+
+    facet_role do
+      override_attributes({
+          :nfs => { :exports => {
+              '/home' => { :name => 'home', :nfs_options => '*.internal(rw,no_root_squash,no_subtree_check)' }}},
+        })
+    end
+
+    volume(:home_vol) do
+      size              20
+      keep              true
+      device            '/dev/sdh' # note: will appear as /dev/xvdh on modern ubuntus
+      mount_point       '/home'
+      attachable        :ebs
+      snapshot_name     :blank_xfs
+      resizable         true
+      create_at_launch  true
+      tags( :persistent => true, :local => false, :bulk => false, :fallback => false )
+    end
+  end
+
+end
diff --git a/example_clusters/el_ridiculoso.rb b/example_clusters/el_ridiculoso.rb
new file mode 100644
index 00000000..ed182eb5
--- /dev/null
+++ b/example_clusters/el_ridiculoso.rb
@@ -0,0 +1,226 @@
+# -*- coding: utf-8 -*-
+#
+# El Ridiculoso Grande -- esto es un clúster gordo que tiene todo lo que en él
+#
+# Maybe you're wondering what would happen if you installed everything in sight
+# on the same node. Here's your chance to find out.
+#
+Ironfan.cluster 'el_ridiculoso' do
+  cloud(:ec2) do
+    availability_zones ['us-east-1d']
+    flavor              'c1.xlarge'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals(:tags => { :hadoop_scratch => true })
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :hadoop
+  role                  :hadoop_s3_keys
+  recipe                'hadoop_cluster::config_files', :last
+  role                  :hbase_client,     :last
+  role                  :zookeeper_client, :last
+
+  # role                :log_integration
+  # role                :zabbix_agent,  :last
+  recipe                'cloud_utils::pickle_node', :last
+
+  module ElRidiculoso
+    module_function
+    def redis_server()  "#{cluster_name}-redis_server";    end
+    def redis_client()  "#{cluster_name}-redis_client";    end
+
+    def master_processes
+      role              :cassandra_server
+      role              :elasticsearch_datanode
+      role              :elasticsearch_httpnode
+      role              :flume_master
+      role              :ganglia_master
+      role              :graphite_server
+      role              :hadoop_jobtracker
+      role              :hadoop_namenode
+      role              :hadoop_secondarynn
+      role              :hbase_master
+#      role              :jenkins_server
+      role              :mongodb_server
+      role              :mysql_server
+      role              :redis_server
+      cloud(:ec2).security_group(redis_server).authorized_by_group(redis_client)
+
+      role              :resque_server
+      role              :statsd_server
+      role              :zabbix_server
+      #role              :zabbix_web
+      role              :zookeeper_server
+      # The default recipes for these run stuff even though it's impolite
+      recipe              'apache2'
+      recipe              'nginx'
+    end
+
+    def worker_processes
+      role              :flume_agent
+      role              :ganglia_agent
+      role              :hadoop_datanode
+      role              :hadoop_tasktracker
+      role              :hbase_regionserver
+      role              :hbase_stargate
+      role              :hbase_thrift
+      role              :jenkins_worker
+    end
+
+    def client_processes
+      role              :cassandra_client
+      role              :elasticsearch_client
+      role              :hbase_client
+      role              :mysql_client
+      role              :nfs_client
+      role              :redis_client
+      cluster_name =    self.cluster_name
+      cloud(:ec2).security_group(redis_client)
+      role              :zookeeper_client
+    end
+
+    def simple_installs
+      role              :jruby
+      role              :pig
+      recipe            'ant'
+      recipe            'bluepill'
+      recipe            'boost'
+      recipe            'build-essential'
+      recipe            'cron'
+      recipe            'git'
+      recipe            'hive'
+      recipe            'java::sun'
+      recipe            'jpackage'
+      recipe            'jruby'
+      recipe            'nodejs'
+      recipe            'ntp'
+      recipe            'openssh'
+      recipe            'openssl'
+      recipe            'rstats'
+      recipe            'runit'
+      recipe            'thrift'
+      recipe            'xfs'
+      recipe            'xml'
+      recipe            'zabbix'
+      recipe            'zlib'
+    end
+  end
+
+  facet :gordo do
+    extend ElRidiculoso
+    instances           1
+
+    master_processes
+    worker_processes
+    client_processes
+    simple_installs
+  end
+
+  facet :jefe do
+    extend ElRidiculoso
+    instances           1
+
+    master_processes
+    simple_installs
+  end
+
+  # Runs worker processes and client packages
+  facet :bobo do
+    extend ElRidiculoso
+    instances           1
+
+    worker_processes
+    client_processes
+    simple_installs
+  end
+
+  facet :pequeno do
+    role :jruby
+    role :pig
+    role :elasticsearch_server
+    role :elasticsearch_client
+    role :hadoop_namenode
+    role :hadoop_sencondarynn
+    role :hadoop_jobtracker
+    role :hadoop_datanode
+    role :hadoop_tasktracker
+    role :tuning, :last
+  end
+
+
+  cluster_role.override_attributes({
+      :apache         => {
+        :server       => { :run_state => [:stop, :disable] }, },
+      :cassandra      => { :run_state => :stop  },
+      :chef           => {
+        :client       => { :run_state => :stop  },
+        :server       => { :run_state => :stop  }, },
+      :elasticsearch  => { :run_state => :stop  },
+      :flume          => {
+        :master       => { :run_state => :stop  },
+        :agent        => { :run_state => :stop  }, },
+      :ganglia        => {
+        :agent        => { :run_state => :stop  },
+        :server       => { :run_state => :stop  }, },
+      :graphite       => {
+        :carbon       => { :run_state => :stop  },
+        :whisper      => { :run_state => :stop  },
+        :dashboard    => { :run_state => :stop  }, },
+      :hadoop         => {
+        :java_heap_size_max => 128,
+        :namenode     => { :run_state => :stop  },
+        :secondarynn  => { :run_state => :stop  },
+        :jobtracker   => { :run_state => :stop  },
+        :datanode     => { :run_state => :stop  },
+        :tasktracker  => { :run_state => :stop  },
+        :hdfs_fuse    => { :run_state => :stop  }, },
+      :hbase          => {
+        :master       => { :run_state => :stop  },
+        :regionserver => { :run_state => :stop  },
+        :thrift       => { :run_state => :stop  },
+        :stargate     => { :run_state => :stop  }, },
+      :jenkins        => {
+        :server       => { :run_state => :stop  },
+        :worker       => { :run_state => :stop  }, },
+      :minidash       => { :run_state => :stop  },
+      :mongodb        => {
+        :server       => { :run_state => :stop  }, },
+      :mysql          => {
+        :server       => { :run_state => :stop  }, },
+      :nginx          => {
+        :server       => { :run_state => :stop  }, },
+      :redis          => {
+        :server       => { :run_state => :stop  }, },
+      :resque         => {
+        :redis        => { :run_state => :stop  },
+        :dashboard    => { :run_state => :stop  }, },
+      :statsd         => { :run_state => :stop  },
+      :zabbix         => {
+        :agent        => { :run_state => :stop  },
+        :master       => { :run_state => :stop  }, },
+      :zookeeper      => {
+        :server       => { :run_state => :stop  }, },
+    })
+
+end
diff --git a/example_clusters/elastic_hadoop.rb b/example_clusters/elastic_hadoop.rb
new file mode 100644
index 00000000..8e5dad50
--- /dev/null
+++ b/example_clusters/elastic_hadoop.rb
@@ -0,0 +1,112 @@
+#
+# Science cluster -- persistent HDFS
+#
+# !!Important setup steps!!:
+#
+# 1. Launch the cluster with the hadoop daemon run states set to 'stop' -- see the
+#    section most of the way down the page.
+#
+# 2. ssh to the machine and run `sudo bash /etc/hadoop/conf/bootstrap_hadoop_namenode.sh`
+#
+# 3. After initial bootstrap,
+#    - set the run_state to :start in the lines below
+#    - run `knife cluster sync` to push those values up to chef
+#    - run `knife cluster kick` to re-converge
+#
+# As soon as you see 'nodes=1' on jobtracker (host:50030) & namenode (host:50070)
+# control panels, you're good to launch the rest of the cluster.
+#
+Ironfan.cluster 'elastic_hadoop' do
+  cloud(:ec2) do
+    permanent           false
+    availability_zones ['us-east-1d']
+    flavor              'm1.large'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals(:tags => { :hadoop_scratch => true, :hadoop_data => false, :persistent => false, :bulk => true })
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :hadoop
+  role                  :hadoop_s3_keys
+  recipe                'hadoop_cluster::config_files', :last
+  role                  :zookeeper_client, :last
+  role                  :hbase_client,  :last
+
+  role                  :jruby
+  role                  :pig
+
+  role                  :tuning,        :last
+
+  facet :master do
+    instances           1
+    role                :hadoop_namenode
+    role                :hadoop_secondarynn
+    role                :hadoop_jobtracker
+    role                :hadoop_datanode
+    role                :hadoop_tasktracker
+  end
+
+  facet :worker do
+    instances           2
+    role                :hadoop_datanode
+    role                :hadoop_tasktracker
+  end
+
+  cluster_role.override_attributes({
+      :hadoop => {
+        # more jobtracker heap size for running large-mapper-count jobs
+        :jobtracker  => { :java_heap_size_max => 2000, },
+        # lets you rapidly decommission nodes for elasticity
+        :balancer    => { :max_bandwidth => (50 * 1024 * 1024) },
+        # make mid-flight data much smaller -- useful esp. with ec2 network constraints
+        :compress_mapout_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
+      },
+    })
+
+  #
+  # Hadoop Daemon run states
+  #
+  facet(:master).facet_role.override_attributes({
+      :hadoop => {
+        :namenode     => { :run_state => :start,  },
+        :secondarynn  => { :run_state => :start,  },
+        :jobtracker   => { :run_state => :start,  },
+        :datanode     => { :run_state => :start,  },
+        :tasktracker  => { :run_state => :start,  },
+      },
+    })
+
+  volume(:ebs1) do
+    size                100
+    keep                true
+    device              '/dev/sdj' # note: will appear as /dev/xvdj on modern ubuntus
+    mount_point         '/data/ebs1'
+    attachable          :ebs
+    snapshot_name       :blank_xfs
+    resizable           true
+    create_at_launch    true
+    tags( :hadoop_data => true, :persistent => true, :local => false, :bulk => true, :fallback => false )
+  end
+
+end
diff --git a/example_clusters/es.rb b/example_clusters/es.rb
new file mode 100644
index 00000000..6f2a45f6
--- /dev/null
+++ b/example_clusters/es.rb
@@ -0,0 +1,59 @@
+Ironfan.cluster 'es' do
+  cloud(:ec2) do
+    # permanent           true
+    availability_zones ['us-east-1d']
+    flavor              't1.micro'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals(:tags => { :elasticsearch_scratch => true }) if (Chef::Config.cloud == 'ec2')
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :tuning,        :last
+
+  facet :elasticsearch do
+    num_nodes =         3
+    instances           num_nodes
+    recipe              'volumes::build_raid', :first
+    #
+    role                :elasticsearch_server
+    role                :elasticsearch_client
+
+    facet_role.override_attributes({
+        :elasticsearch => {
+          :expected_nodes        => num_nodes,
+          :recovery_after_nodes  => num_nodes,
+          :s3_gateway_bucket     => "elasticsearch.#{Chef::Config[:organization]}.chimpy.us",
+          :server                => { :run_state => :start }
+        }
+      })
+
+    raid_group(:md0) do
+      device            '/dev/md0'
+      mount_point       '/raid0'
+      level             0
+      sub_volumes       [:ephemeral0, :ephemeral1, :ephemeral2, :ephemeral3]
+    end if (Chef::Config.cloud == 'ec2')
+  end
+
+end
diff --git a/example_clusters/hb.rb b/example_clusters/hb.rb
new file mode 100644
index 00000000..f8156756
--- /dev/null
+++ b/example_clusters/hb.rb
@@ -0,0 +1,160 @@
+#
+# HBase with persisten EBS-backed storage.
+#
+# For serious use, you will want to go to larger nodes (`m1.xlarge` works well
+# for us) and should NOT use EBS-backed storage. We assume that for initial
+# experimentation you'll want to start/stop this, so it comes out of the box
+# with EBS.
+#
+# !!Important setup steps!!:
+#
+# Launch the cluster with the hadoop daemon run states set to 'stop' -- see the
+# section most of the way down the page.
+#
+# After initial bootstrap,
+# * set the run_state to :start for all but the jobtracker and tasktracker (for an hbase client, these will typically be left at `:stop`)
+# * run `knife cluster sync` to push those values up to chef
+# * run `knife cluster kick` to re-converge
+#
+# As soon as you see 'nodes=1' on jobtracker (host:50030) & namenode (host:50070)
+# control panels, you're good to launch the rest of the cluster.
+#
+Ironfan.cluster 'hb' do
+  cloud(:ec2) do
+    # permanent         true
+    availability_zones ['us-east-1d']
+    flavor              'm1.large'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals(:tags => { :hbase_scratch => true, :hadoop_scratch => true })
+  end
+
+  flume_cluster_name     = :dds
+  hbase_cluster_name     = :hb
+  science_cluster_name   = :elastic_hadoop
+  zookeeper_cluster_name = :zk
+
+  environment           :dev
+
+  cluster_overrides = Mash.new({
+      # Look for the zookeeper nodes in the dedicated zookeeper cluster
+      :discovers => {
+        :zookeeper    => { :server    => zookeeper_cluster_name },
+      },
+      :hadoop         => {
+        :namenode     => { :run_state => :start,  },
+        :secondarynn  => { :run_state => :start,  },
+        :datanode     => { :run_state => :start,  },
+        :jobtracker   => { :run_state => :stop,  }, # leave this at 'stop', usually
+        :tasktracker  => { :run_state => :stop,  }, # leave this at 'stop', usually
+        :compress_mapout_codec => 'org.apache.hadoop.io.compress.SnappyCodec',
+      },
+      :hbase          => {
+        :master       => { :run_state => :start  },
+        :regionserver => { :run_state => :start  },
+        :stargate     => { :run_state => :start  }, },
+      :zookeeper      => {
+        :server       => { :run_state => :start  }, },
+    })
+
+  #
+  ## Uncomment the lines below to stop all services on the cluster
+  #
+  #  cluster_overrides[:hadoop   ][:namenode     ][:run_state] = :stop
+  #  cluster_overrides[:hadoop   ][:secondarynn  ][:run_state] = :stop
+  #  cluster_overrides[:hadoop   ][:datanode     ][:run_state] = :stop
+  #  cluster_overrides[:hbase    ][:master       ][:run_state] = :stop
+  #  cluster_overrides[:hbase    ][:regionserver ][:run_state] = :stop
+  #  cluster_overrides[:hbase    ][:stargate     ][:run_state] = :stop
+  #  cluster_overrides[:zookeeper][:server       ][:run_state] = :stop
+
+  # # total size of the JVM heap (regionserver) (default 2000m)
+  # cluster_overrides[:hbase][:regionserver][:java_heap_size_max] = "4000m"
+  #
+  # # hbase.hregion.memstore.mslab.enabled (default false) -- Experimental: Enables the
+  # #   MemStore-Local Allocation Buffer, a feature which works to prevent heap fragmentation
+  # #   under heavy write loads. This can reduce the frequency of stop-the-world GC pauses on
+  # #   large heaps.
+  # cluster_overrides[:hbase][:memstore] ||= {}
+  # cluster_overrides[:hbase][:memstore][:mslab_enabled]          = true
+  #
+  # # Setting this to 0 entirely removes the limit on concurrent connections. This is necessary
+  # #   to overcome https://issues.apache.org/jira/browse/HBASE-4684 in HBase 0.90.4
+  # cluster_overrides[:zookeeper][:max_client_connections]        = 0
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :hadoop
+  role                  :hadoop_s3_keys
+  recipe                'hadoop_cluster::config_files', :last
+  role                  :zookeeper_client, :last
+  role                  :hbase_client,  :last
+  recipe                'hbase::config_files',          :last
+
+  role                  :jruby
+  role                  :pig
+
+  role                  :tuning,        :last
+
+  facet :alpha do
+    instances           1
+    role                :hadoop_namenode
+    role                :hbase_master
+  end
+  facet :beta do
+    instances           1
+    role                :hadoop_secondarynn
+    role                :hadoop_jobtracker
+    role                :hbase_master
+  end
+  facet :worker do
+    instances           3
+    role                :hadoop_datanode
+    role                :hadoop_tasktracker
+    role                :hbase_regionserver
+    role                :hbase_stargate
+    role                :hbase_thrift
+  end
+
+  # This line, and the 'discovers' setting in the cluster_role,
+  # enable the hbase to use an external zookeeper cluster
+  cloud(:ec2).security_group(self.name) do
+    authorized_by_group(zookeeper_cluster_name)
+    authorize_group(flume_cluster_name)
+    authorize_group(science_cluster_name)
+  end
+
+  #
+  # Attach persistent storage to each node, and use it for all hadoop data_dirs.
+  #
+  volume(:ebs1) do
+    size                10
+    keep                true
+    device              '/dev/sdj' # note: will appear as /dev/xvdj on modern ubuntus
+    mount_point         '/data/ebs1'
+    attachable          :ebs
+    snapshot_name       :blank_xfs
+    resizable           true
+    create_at_launch    true
+    tags( :hbase_data => true, :hadoop_data => true, :zookeeper_data => true, :persistent => true, :local => false, :bulk => true, :fallback => false )
+  end
+
+  cluster_role.override_attributes(cluster_overrides)
+end
diff --git a/example_clusters/metropolis.rb b/example_clusters/metropolis.rb
new file mode 100644
index 00000000..cd44c7ba
--- /dev/null
+++ b/example_clusters/metropolis.rb
@@ -0,0 +1,58 @@
+#
+# Metropolis demo cluster -- runs graphite, statsd, goliath-statsd
+#
+Ironfan.cluster 'metropolis' do
+  cloud(:ec2) do
+    permanent           false
+    availability_zones ['us-east-1d']
+    flavor              't1.micro'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :tuning,        :last
+
+  facet :master do
+    instances           1
+
+    role                :graphite_server
+    role                :statsd_server
+  end
+
+  cluster_role.override_attributes({
+    })
+
+  volume(:ebs1) do
+    size                10
+    keep                true
+    device              '/dev/sdj' # note: will appear as /dev/xvdj on modern ubuntus
+    mount_point         '/data/ebs1'
+    attachable          :ebs
+    snapshot_name       :blank_xfs
+    resizable           true
+    create_at_launch    true
+    tags( :graphite_data => true, :persistent => true, :bulk => true, :local => false, :fallback => false )
+  end
+end
diff --git a/example_clusters/sandbox.rb b/example_clusters/sandbox.rb
new file mode 100644
index 00000000..ce819751
--- /dev/null
+++ b/example_clusters/sandbox.rb
@@ -0,0 +1,57 @@
+#
+# Sandbox cluster -- use this for general development
+#
+Ironfan.cluster 'sandbox' do
+  cloud(:ec2) do
+    permanent           false
+    availability_zones ['us-east-1d']
+    flavor              't1.micro'
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :tuning,        :last
+
+  facet :simple do
+    instances           1
+  end
+
+  facet :raid_demo do
+    instances           1
+    cloud(:ec2).flavor        'm1.large'
+    recipe              'volumes::build_raid', :first
+
+    cloud(:ec2).mount_ephemerals
+    raid_group(:md0) do
+      device            '/dev/md0'
+      mount_point       '/raid0'
+      level             0
+      sub_volumes       [:ephemeral0, :ephemeral1]
+    end
+  end
+
+  cluster_role.override_attributes({
+    })
+end
diff --git a/example_clusters/web.rb b/example_clusters/web.rb
new file mode 100644
index 00000000..6ddc5ad3
--- /dev/null
+++ b/example_clusters/web.rb
@@ -0,0 +1,90 @@
+Ironfan.cluster 'web' do
+  cloud :ec2 do
+    # permanent         true
+    availability_zones ['us-east-1d']
+    flavor              't1.micro'  # change to something larger for serious use
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  redis_server = "#{cluster_name}-redis_server"
+  redis_client = "#{cluster_name}-redis_client"
+  facet :webnode do
+    instances           6
+    role                :nginx
+    role                :redis_client
+    cloud(:ec2).security_group(redis_server).authorized_by_group(redis_client)
+    role                :mysql_client
+    role                :elasticsearch_client
+    role                :awesome_website
+    role                :web_server      # this triggers opening appropriate ports
+    cloud(:ec2).security_group(full_name) do
+      authorize_port_range  80..80
+      authorize_port_range 443..443
+    end
+
+    # Rotate nodes among availability zones
+    azs = ['us-east-1d', 'us-east-1b', 'us-east-1c']
+    (0...instances).each do |idx|
+      server(idx).cloud(:ec2).availability_zones [azs[ idx % azs.length ]]
+    end
+    Chef::Log.warn "Can't pull this trick in v4.x (how do we manipulate individual nodes from Ironfan core?)"
+    # # Rote nodes among A/B testing groups
+    # (0..instances).each do |idx|
+    #  server(idx).chef_node.normal[:split_testing] = ( (idx % 2 == 0) ? 'A' : 'B' ) if server(idx).chef_node
+    # end
+  end
+
+  facet :dbnode do
+    instances           2
+    # burly master, wussy workers
+    cloud(:ec2).flavor        'm1.large'
+    server(0).cloud(:ec2).flavor 'c1.xlarge'
+    #
+    role                :mysql_server
+    volume(:data) do
+      size              50
+      keep              true
+      device            '/dev/sdi' # note: will appear as /dev/xvdi on modern ubuntus
+      mount_point       '/data/db'
+      attachable        :ebs
+      snapshot_name     :blank_xfs
+      resizable         true
+      create_at_launch  true
+      tags( :persistent => true, :local => false, :bulk => false, :fallback => false )
+    end
+  end
+
+  facet :esnode do
+    instances           1
+    cloud(:ec2).flavor        "m1.large"
+    #
+    role                :nginx
+    role                :redis_server
+    cloud(:ec2).security_group(redis_client)
+    role                :elasticsearch_datanode
+    role                :elasticsearch_httpnode
+  end
+end
diff --git a/example_clusters/zk.rb b/example_clusters/zk.rb
new file mode 100644
index 00000000..15dd6309
--- /dev/null
+++ b/example_clusters/zk.rb
@@ -0,0 +1,71 @@
+#
+# A zookeeper quorum
+#
+# The only sensible numbers of instances to launch with are 3 or 5.  A solo
+# zookeeper doesn't guarantee availability; and you should NEVER run an even
+# number of ZKs (http://hbase.apache.org/book/zookeeper.html).
+#
+Ironfan.cluster 'zk' do
+  cloud(:ec2) do
+    permanent           true
+    availability_zones ['us-east-1d']
+    flavor              't1.micro'  # change to something larger for serious use
+    backing             'ebs'
+    image_name          'ironfan-natty'
+    bootstrap_distro    'ubuntu10.04-ironfan'
+    chef_client_script  'client.rb'
+    mount_ephemerals(:tags => { :zookeeper_journal => true, :zookeeper_scratch => true, :zookeeper_data => false, })
+  end
+
+  environment           :dev
+
+  role                  :systemwide
+  cloud(:ec2).security_group :systemwide
+  role                  :chef_client
+  role                  :ssh
+  cloud(:ec2).security_group(:ssh).authorize_port_range 22..22
+  role                  :nfs_client
+  cloud(:ec2).security_group :nfs_client
+  role                  :set_hostname
+
+  role                  :volumes
+  role                  :package_set,   :last
+  role                  :minidash,      :last
+
+  role                  :org_base
+  role                  :org_users
+  role                  :org_final,     :last
+
+  role                  :jruby
+
+  role                  :tuning,        :last
+
+  facet :zookeeper do
+    instances           1
+    role                :zookeeper_server
+  end
+
+  facet(:zookeeper).facet_role.override_attributes({
+      :zookeeper => {
+        :server      => { :run_state => :start, },
+      },
+    })
+
+  #
+  # Attach 10GB persistent storage to each node, and use it for all zookeeper data_dirs.
+  #
+  # Modify the snapshot ID and attached volume size to suit
+  #
+  volume(:ebs1) do
+    size                10
+    keep                true
+    device              '/dev/sdk' # note: will appear as /dev/xvdk on natty
+    mount_point         '/data/ebs1'
+    attachable          :ebs
+    snapshot_name       :blank_xfs
+    resizable           true
+    create_at_launch    true
+    tags( :zookeeper_data => true, :zookeeper_journal => false, :persistent => true, :local => false, :bulk => true, :fallback => false )
+  end
+
+end
diff --git a/vendor/README.md b/vendor/README.md
deleted file mode 100644
index f227f32f..00000000
--- a/vendor/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-## vendor/ -- actual checkouts of cookbooks &c
-
-* `infochimps/` - cookbooks maintained by infochimps (either originated us or heavily modified)
-* `opscode/`    - the opscode community cookbooks collection. By default a git submodule checkout of http://github.com/infochimps-labs/opscode-cookbooks. This repo tracks the opscode repo but has some relevant fixes applied.
-
-All of the cookbooks you see here are those infochimps uses in production at time of commit. After doing a `git submodule update --init`, you can check out your own fork and git will magically track that instead.