Skip to content
Browse files

First working version.

  • Loading branch information...
0 parents commit ec5d49cb4af197e97f766a1223a4808a8d7bbc7b @garthk committed
Showing with 683 additions and 0 deletions.
  1. +3 −0 .gitignore
  2. +16 −0 Makefile
  3. +9 −0 Modulefile
  4. +68 −0 README.md
  5. +14 −0 Vagrantfile
  6. +1 −0 files/.gitignore
  7. +337 −0 files/elasticsearch.yml
  8. +7 −0 files/etc-default-elasticsearch
  9. +16 −0 files/etc-init-elasticsearch.conf
  10. +44 −0 files/logging.yml
  11. +142 −0 manifests/init.pp
  12. +1 −0 tests/init.pp
  13. +1 −0 tests/modules/elasticsearch
  14. +24 −0 tests/vagrant.pp
3 .gitignore
@@ -0,0 +1,3 @@
+metadata.json
+pkg
+.vagrant
16 Makefile
@@ -0,0 +1,16 @@
+smoke: test
+
+test:
+ find tests -name \*.pp | xargs -n 1 -t puppet apply --noop --modulepath=tests/modules
+
+vm:
+ vagrant up
+
+es_version = 0.18.7
+es_tarchive = elasticsearch-$(es_version).tar.gz
+es_source = http://cloud.github.com/downloads/elasticsearch/elasticsearch
+
+fetch: files/$(es_tarchive)
+
+files/$(es_tarchive):
+ curl -o files/$(es_tarchive) $(es_source)/$(es_tarchive)
9 Modulefile
@@ -0,0 +1,9 @@
+name 'garthk-elasticsearch'
+version '0.1.0'
+author 'garthk'
+license 'Apache License, Version 2.0'
+summary 'Puppet module for elasticsearch on Ubuntu.'
+description "Puppet module for elasticsearch on Ubuntu."
+
+## Add dependencies, if any:
+# dependency 'username/name', '>= 1.2.0'
68 README.md
@@ -0,0 +1,68 @@
+Puppet module for [elasticsearch] on Ubuntu.
+
+This module roughly matches the behavior of the `.deb` file shipped with
+elasticsearch version 0.19. I can't use 0.19 with the current version of
+[logstash], however, hence this module.
+
+[logstash]: https://github.com/logstash/logstash
+
+## Prerequisites
+
+* An Ubuntu system
+* Puppet 2.7
+
+## Installation
+
+ cd /etc/puppet/modules
+ git clone git://github.com/garthk/puppet-elasticsearch elasticsearch
+ cd elasticsearch
+ make fetch # for default 0.18.7 download
+
+## Usage
+
+ class { 'elasticsearch':
+ version => '0.18.7',
+ java_package => 'openjdk-6-jre-headless',
+ dbdir => '/var/lib/elasticsearch',
+ logdir => '/var/log/elasticsearch',
+ }
+
+All arguments are optional.
+
+## Configuration:
+
+The parameters to `elasticsearch` aside, you can supply your own
+`elasticsearch.yml` and `logging.yml` files by making them available via the
+Puppet file server as either:
+
+* `site-elasticsearch/${fqdn}/*.yml` or
+* `site-elasticsearch/*.yml`.
+
+If you don't supply them, the default content will be supplied by the module.
+
+## Testing:
+
+### Smoke Testing
+
+* `make test` or `make smoke` to perform a simple [smoke test]
+
+### Vagrant
+
+* Install [Vagrant]
+
+* Get the `lucid32` box (safe even if you already have it):
+
+ vagrant box add lucid32 http://files.vagrantup.com/lucid32.box
+
+* Fetch the default version of elasticsearch:
+
+ make fetch
+
+* Launch the virtual machine:
+
+ vagrant up
+
+[elasticsearch]: http://www.elasticsearch.org/
+[Vagrant]: http://vagrantup.com/
+[smoke test]: http://docs.puppetlabs.com/guides/tests_smoke.html
+[get in touch]: http://twitter.com/garthk
14 Vagrantfile
@@ -0,0 +1,14 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+Vagrant::Config.run do |config|
+ config.vm.box = "lucid32"
+ config.vm.host_name = "elasticsearch"
+ config.vm.network :hostonly, "192.168.31.46"
+ config.vm.share_folder "modules/elasticsearch", "/tmp/vagrant-puppet/modules/elasticsearch", ".", :create => true
+ config.vm.provision :puppet do |puppet|
+ puppet.manifests_path = "tests"
+ puppet.manifest_file = "vagrant.pp"
+ puppet.options = ["--modulepath", "/tmp/vagrant-puppet/modules"]
+ end
+end
1 files/.gitignore
@@ -0,0 +1 @@
+*.tar.gz
337 files/elasticsearch.yml
@@ -0,0 +1,337 @@
+##################### ElasticSearch Configuration Example #####################
+
+# This file contains an overview of various configuration settings,
+# targeted at operations staff. Application developers should
+# consult the guide at <http://elasticsearch.org/guide>.
+#
+# The installation procedure is covered at
+# <http://elasticsearch.org/guide/reference/setup/installation.html>.
+#
+# ElasticSearch comes with reasonable defaults for most settings,
+# so you can try it out without bothering with configuration.
+#
+# Most of the time, these defaults are just fine for running a production
+# cluster. If you're fine-tuning your cluster, or wondering about the
+# effect of certain configuration option, please _do ask_ on the
+# mailing list or IRC channel [http://elasticsearch.org/community].
+
+# Any element in the configuration can be replaced with environment variables
+# by placing them in ${...} notation. For example:
+#
+# node.rack: ${RACK_ENV_VAR}
+
+# See <http://elasticsearch.org/guide/reference/setup/configuration.html>
+# for information on supported formats and syntax for the configuration file.
+
+
+################################### Cluster ###################################
+
+# Cluster name identifies your cluster for auto-discovery. If you're running
+# multiple clusters on the same network, make sure you're using unique names.
+#
+# cluster.name: elasticsearch
+
+
+#################################### Node #####################################
+
+# Node names are generated dynamically on startup, so you're relieved
+# from configuring them manually. You can tie this node to a specific name:
+#
+# node.name: "Franz Kafka"
+
+# Every node can be configured to allow or deny being eligible as the master,
+# and to allow or deny to store the data.
+#
+# Allow this node to be eligible as a master node (enabled by default):
+#
+# node.master: true
+#
+# Allow this node to store data (enabled by default):
+#
+# node.data: true
+
+# You can exploit these settings to design advanced cluster topologies.
+#
+# 1. You want this node to never become a master node, only to hold data.
+# This will be the "workhorse" of your cluster.
+#
+# node.master: false
+# node.data: true
+#
+# 2. You want this node to only serve as a master: to not store any data and
+# to have free resources. This will be the "coordinator" of your cluster.
+#
+# node.master: true
+# node.data: false
+#
+# 3. You want this node to be neither master nor data node, but
+# to act as a "search load balancer" (fetching data from nodes,
+# aggregating results, etc.)
+#
+# node.master: false
+# node.data: false
+
+# Use the Cluster Health API [http://localhost:9200/_cluster/health], the
+# Node Info API [http://localhost:9200/_cluster/nodes] or GUI tools
+# such as <http://github.com/lukas-vlcek/bigdesk> and
+# <http://mobz.github.com/elasticsearch-head> to inspect the cluster state.
+
+# A node can have generic attributes associated with it, which can later be used
+# for customized shard allocation filtering, or allocation awareness. An attribute
+# is a simple key value pair, similar to node.key: value, here is an example:
+#
+# node.rack: rack314
+
+
+#################################### Index ####################################
+
+# You can set a number of options (such as shard/replica options, mapping
+# or analyzer definitions, translog settings, ...) for indices globally,
+# in this file.
+#
+# Note, that it makes more sense to configure index settings specifically for
+# a certain index, either when creating it or by using the index templates API.
+#
+# See <http://elasticsearch.org/guide/reference/index-modules/> and
+# <http://elasticsearch.org/guide/reference/api/admin-indices-create-index.html>
+# for more information.
+
+# Set the number of shards (splits) of an index (5 by default):
+#
+# index.number_of_shards: 5
+
+# Set the number of replicas (additional copies) of an index (1 by default):
+#
+# index.number_of_replicas: 1
+
+# Note, that for development on a local machine, with small indices, it usually
+# makes sense to "disable" the distributed features:
+#
+# index.number_of_shards: 1
+# index.number_of_replicas: 0
+
+# These settings directly affect the performance of index and search operations
+# in your cluster. Assuming you have enough machines to hold shards and
+# replicas, the rule of thumb is:
+#
+# 1. Having more *shards* enhances the _indexing_ performance and allows to
+# _distribute_ a big index across machines.
+# 2. Having more *replicas* enhances the _search_ performance and improves the
+# cluster _availability_.
+#
+# The "number_of_shards" is a one-time setting for an index.
+#
+# The "number_of_replicas" can be increased or decreased anytime,
+# by using the Index Update Settings API.
+#
+# ElasticSearch takes care about load balancing, relocating, gathering the
+# results from nodes, etc. Experiment with different settings to fine-tune
+# your setup.
+
+# Use the Index Status API (<http://localhost:9200/A/_status>) to inspect
+# the index status.
+
+
+#################################### Paths ####################################
+
+# Path to directory containing configuration (this file and logging.yml):
+#
+path.conf: /etc/elasticsearch
+
+# Path to directory where to store index data allocated for this node.
+#
+path.data: /var/lib/elasticsearch
+#
+# Can optionally include more than one location, causing data to be striped across
+# the locations (à la RAID 0) on a file level, favouring locations with most free
+# space on creation. For example:
+#
+# path.data: /path/to/data1,/path/to/data2
+
+# Path to temporary files:
+#
+# path.work: /path/to/work
+
+# Path to log files:
+#
+path.logs: /var/log/elasticsearch
+
+# Path to where plugins are installed:
+#
+# path.plugins: /path/to/plugins
+
+
+################################### Memory ####################################
+
+# ElasticSearch performs poorly when JVM starts swapping: you should ensure that
+# it _never_ swaps.
+#
+# Set this property to true to lock the memory:
+#
+# bootstrap.mlockall: true
+
+# Make sure that the ES_MIN_MEM and ES_MAX_MEM environment variables are set
+# to the same value, and that the machine has enough memory to allocate
+# for ElasticSearch, leaving enough memory for the operating system itself.
+#
+# You should also make sure that the ElasticSearch process is allowed to lock
+# the memory, eg. by using `ulimit -l unlimited`.
+
+
+############################## Network And HTTP ###############################
+
+# ElasticSearch, by default, binds itself to the 0.0.0.0 address, and listens
+# on port [9200-9300] for HTTP traffic and on port [9300-9400] for node-to-node
+# communication. (the range means that if the port is busy, it will automatically
+# try the next port).
+
+# Set the bind address specifically (IPv4 or IPv6):
+#
+# network.bind_host: 192.168.0.1
+
+# Set the address other nodes will use to communicate with this node. If not
+# set, it is automatically derived. It must point to an actual IP address.
+#
+# network.publish_host: 192.168.0.1
+
+# Set both 'bind_host' and 'publish_host':
+#
+# network.host: 192.168.0.1
+
+# Set a custom port for the node to node communication (9300 by default):
+#
+# transport.tcp.port: 9300
+
+# Enable compression for all communication between nodes (disabled by default):
+#
+# transport.tcp.compress: true
+
+# Set a custom port to listen for HTTP traffic:
+#
+# http.port: 9200
+
+# Set a custom allowed content length:
+#
+# http.max_content_length: 100mb
+
+# Disable HTTP completely:
+#
+# http.enabled: false
+
+
+################################### Gateway ###################################
+
+# The gateway allows for persisting the cluster state between full cluster
+# restarts. Every change to the state (such as adding an index) will be stored
+# in the gateway, and when the cluster starts up for the first time,
+# it will read its state from the gateway.
+
+# There are several types of gateway implementations. For more information,
+# see <http://elasticsearch.org/guide/reference/modules/gateway>.
+
+# The default gateway type is the "local" gateway (recommended):
+#
+# gateway.type: local
+
+# Settings below control how and when to start the initial recovery process on
+# a full cluster restart (to reuse as much local data as possible).
+
+# Allow recovery process after N nodes in a cluster are up:
+#
+# gateway.recover_after_nodes: 1
+
+# Set the timeout to initiate the recovery process, once the N nodes
+# from previous setting are up (accepts time value):
+#
+# gateway.recover_after_time: 5m
+
+# Set how many nodes are expected in this cluster. Once these N nodes
+# are up, begin recovery process immediately:
+#
+# gateway.expected_nodes: 2
+
+
+############################# Recovery Throttling #############################
+
+# These settings allow to control the process of shards allocation between
+# nodes during initial recovery, replica allocation, rebalancing,
+# or when adding and removing nodes.
+
+# Set the number of concurrent recoveries happening on a node:
+#
+# 1. During the initial recovery
+#
+# cluster.routing.allocation.node_initial_primaries_recoveries: 4
+#
+# 2. During adding/removing nodes, rebalancing, etc
+#
+# cluster.routing.allocation.node_concurrent_recoveries: 2
+
+# Set to throttle throughput when recovering (eg. 100mb, by default unlimited):
+#
+# indices.recovery.max_size_per_sec: 0
+
+# Set to limit the number of open concurrent streams when
+# recovering a shard from a peer:
+#
+# indices.recovery.concurrent_streams: 5
+
+
+################################## Discovery ##################################
+
+# Discovery infrastructure ensures nodes can be found within a cluster
+# and master node is elected. Multicast discovery is the default.
+
+# Set to ensure a node sees N other master eligible nodes to be considered
+# operational within the cluster. Set this option to a higher value (2-4)
+# for large clusters:
+#
+# discovery.zen.minimum_master_nodes: 1
+
+# Set the time to wait for ping responses from other nodes when discovering.
+# Set this option to a higher value on a slow or congested network
+# to minimize discovery failures:
+#
+# discovery.zen.ping.timeout: 3s
+
+# See <http://elasticsearch.org/guide/reference/modules/discovery/zen.html>
+# for more information.
+
+# Unicast discovery allows to explicitly control which nodes will be used
+# to discover the cluster. It can be used when multicast is not present,
+# or to restrict the cluster communication-wise.
+#
+# 1. Disable multicast discovery (enabled by default):
+#
+# discovery.zen.ping.multicast.enabled: false
+#
+# 2. Configure an initial list of master nodes in the cluster
+# to perform discovery when new nodes (master or data) are started:
+#
+# discovery.zen.ping.unicast.hosts: ["host1", "host2:port", "host3[portX-portY]"]
+
+# EC2 discovery allows to use AWS EC2 API in order to perform discovery.
+#
+# You have to install the cloud-aws plugin for enabling the EC2 discovery.
+#
+# See <http://elasticsearch.org/guide/reference/modules/discovery/ec2.html>
+# for more information.
+#
+# See <http://elasticsearch.org/tutorials/2011/08/22/elasticsearch-on-ec2.html>
+# for a step-by-step tutorial.
+
+
+################################## Slow Log ##################################
+
+# Shard level query and fetch threshold logging.
+
+#index.search.slowlog.level: TRACE
+#index.search.slowlog.threshold.query.warn: 10s
+#index.search.slowlog.threshold.query.info: 5s
+#index.search.slowlog.threshold.query.debug: 2s
+#index.search.slowlog.threshold.query.trace: 500ms
+
+#index.search.slowlog.threshold.fetch.warn: 1s
+#index.search.slowlog.threshold.fetch.info: 800ms
+#index.search.slowlog.threshold.fetch.debug: 500ms
+#index.search.slowlog.threshold.fetch.trace: 200ms
7 files/etc-default-elasticsearch
@@ -0,0 +1,7 @@
+ES_USER="elasticsearch"
+ES_GROUP="elasticsearch"
+ES_MIN_MEM="256m"
+ES_MAX_MEM="2g"
+ES_JAVA_OPTS="-Des.config=/etc/elasticsearch/elasticsearch.yml"
+
+export ES_MIN_MEM ES_MAX_MEM ES_JAVA_OPTS
16 files/etc-init-elasticsearch.conf
@@ -0,0 +1,16 @@
+description "ElasticSearch"
+
+start on (net-device-up and local-filesystems and runlevel [2345])
+stop on runlevel [016]
+respawn
+respawn limit 10 5
+
+console output
+
+script
+ if [ -f /etc/default/elasticsearch ]; then
+ . /etc/default/elasticsearch
+ fi
+
+ su -s /bin/dash -c "/usr/bin/elasticsearch -f" elasticsearch
+end script
44 files/logging.yml
@@ -0,0 +1,44 @@
+rootLogger: INFO, console, file
+logger:
+ # log action execution errors for easier debugging
+ action: DEBUG
+ # reduce the logging for aws, too much is logged under the default INFO
+ com.amazonaws: WARN
+
+ # gateway
+ #gateway: DEBUG
+ #index.gateway: DEBUG
+
+ # peer shard recovery
+ #indices.recovery: DEBUG
+
+ # discovery
+ #discovery: TRACE
+
+ index.search.slowlog: TRACE, index_search_slow_log_file
+
+additivity:
+ index.search.slowlog: false
+
+appender:
+ console:
+ type: console
+ layout:
+ type: consolePattern
+ conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
+
+ file:
+ type: dailyRollingFile
+ file: ${path.logs}/${cluster.name}.log
+ datePattern: "'.'yyyy-MM-dd"
+ layout:
+ type: pattern
+ conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
+
+ index_search_slow_log_file:
+ type: dailyRollingFile
+ file: ${path.logs}/${cluster.name}_index_search_slowlog.log
+ datePattern: "'.'yyyy-MM-dd"
+ layout:
+ type: pattern
+ conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"
142 manifests/init.pp
@@ -0,0 +1,142 @@
+class elasticsearch::params {
+ $version = "0.18.7"
+ $java_package = "openjdk-6-jre-headless"
+ $dbdir = "/var/lib/elasticsearch"
+ $logdir = "/var/log/elasticsearch"
+}
+
+class elasticsearch(
+ $version = $elasticsearch::params::version,
+ $java_package = $elasticsearch::params::java_package,
+ $dbdir = $elasticsearch::params::dbdir,
+ $logdir = $elasticsearch::params::logdir
+) inherits elasticsearch::params {
+ $tarchive = "elasticsearch-${version}.tar.gz"
+ $tmptarchive = "/tmp/${tarchive}"
+ $tmpdir = "/tmp/elasticsearch-${version}"
+ $sharedirv = "/usr/share/elasticsearch-${version}"
+ $sharedir = "/usr/share/elasticsearch"
+ $etcdir = "/etc/elasticsearch"
+ $upstartfile = "/etc/init/elasticsearch.conf"
+ $defaultsfile = "/etc/default/elasticsearch"
+ $configfile = "$etcdir/elasticsearch.yml"
+ $logconfigfile = "$etcdir/logging.yml"
+
+ if !defined(Package[$java_package]) {
+ package { $java_package:
+ ensure => installed,
+ }
+ }
+
+ File {
+ before => Service['elasticsearch'],
+ }
+
+ group { 'elasticsearch':
+ ensure => present,
+ system => true,
+ }
+
+ user { 'elasticsearch':
+ ensure => present,
+ system => true,
+ home => $sharedir,
+ shell => '/bin/false',
+ gid => 'elasticsearch',
+ require => Group['elasticsearch'],
+ }
+
+ file { $dbdir:
+ ensure => directory,
+ owner => 'elasticsearch',
+ group => 'elasticsearch',
+ mode => '0755',
+ require => User['elasticsearch'],
+ }
+
+ file { $logdir:
+ ensure => directory,
+ owner => 'elasticsearch',
+ group => 'elasticsearch',
+ mode => '0755',
+ require => User['elasticsearch'],
+ }
+
+ file { $tmptarchive:
+ ensure => present,
+ source => "puppet:///elasticsearch/${tarchive}",
+ owner => 'elasticsearch',
+ mode => '0644',
+ }
+
+ exec { $tmpdir:
+ command => "/bin/tar xzf ${tmptarchive}",
+ cwd => "/tmp",
+ creates => $tmpdir,
+ require => File[$tmptarchive],
+ }
+
+ exec { $sharedirv:
+ command => "find . -type f | xargs -i{} install -D {} ${sharedirv}/{}",
+ cwd => $tmpdir,
+ path => "/bin:/usr/bin",
+ creates => $sharedirv,
+ require => Exec[$tmpdir],
+ }
+
+ file { $sharedir:
+ ensure => link,
+ target => $sharedirv,
+ require => Exec[$sharedirv],
+ }
+
+ file { "$sharedir/elasticsearch.in.sh":
+ ensure => link,
+ target => "$sharedir/bin/elasticsearch.in.sh",
+ require => File[$sharedir],
+ }
+
+ file { "/usr/bin/elasticsearch":
+ ensure => link,
+ target => "$sharedirv/bin/elasticsearch",
+ require => Exec[$sharedirv],
+ }
+
+ file { $etcdir:
+ ensure => directory
+ }
+
+ file { $configfile:
+ ensure => present,
+ source => ["puppet:///site-elasticsearch/${fqdn}/elasticsearch.yml",
+ "puppet:///site-elasticsearch/elasticsearch.yml",
+ "puppet:///modules/elasticsearch/elasticsearch.yml"],
+ owner => root,
+ group => root,
+ }
+
+ file { $logconfigfile:
+ ensure => present,
+ source => ["puppet:///site-elasticsearch/${fqdn}/logging.yml",
+ "puppet:///site-elasticsearch/logging.yml",
+ "puppet:///modules/elasticsearch/logging.yml"],
+ owner => root,
+ group => root,
+ }
+
+ file { $defaultsfile:
+ ensure => present,
+ source => "puppet:///elasticsearch/etc-default-elasticsearch",
+ }
+
+ file { $upstartfile:
+ ensure => present,
+ source => "puppet:///elasticsearch/etc-init-elasticsearch.conf",
+ }
+
+ service { 'elasticsearch':
+ ensure => running,
+ enable => true,
+ provider => upstart,
+ }
+}
1 tests/init.pp
@@ -0,0 +1 @@
+class { 'elasticsearch': }
1 tests/modules/elasticsearch
24 tests/vagrant.pp
@@ -0,0 +1,24 @@
+stage { pre: before => Stage[main] }
+
+class apt_get_update {
+ $sentinel = "/var/lib/apt/first-puppet-run"
+
+ exec { "initial apt-get update":
+ command => "/usr/bin/apt-get update && touch ${sentinel}",
+ onlyif => "/usr/bin/env test \\! -f ${sentinel} || /usr/bin/env test \\! -z \"$(find /etc/apt -type f -cnewer ${sentinel})\"",
+ timeout => 3600,
+ }
+}
+
+# If we don't run apt-get update, installing openjdk will fail.
+class { 'apt_get_update':
+ stage => pre,
+}
+
+group { 'puppet':
+ ensure => present,
+ system => true,
+}
+
+class { 'elasticsearch':
+}

0 comments on commit ec5d49c

Please sign in to comment.
Something went wrong with that request. Please try again.