Skip to content

Commit

Permalink
added pkg dir and ssh keys
Browse files Browse the repository at this point in the history
  • Loading branch information
bcarpio committed Mar 7, 2012
1 parent 42b8ab1 commit 6ff77b7
Show file tree
Hide file tree
Showing 21 changed files with 408 additions and 12 deletions.
13 changes: 3 additions & 10 deletions Modulefile
@@ -1,11 +1,4 @@
name 'bcarpio-hadoop'
version '0.0.1'
source 'UNKNOWN'
author 'bcarpio'
license 'UNKNOWN'
summary 'UNKNOWN'
description 'UNKNOWN'
project_page 'UNKNOWN'

## Add dependencies, if any:
# dependency 'username/name', '>= 1.2.0'
version '0.0.2'
author 'Brian Carpio'
summary 'Install hadoop and create hdfs file system. Read the README.markdown for more information'
3 changes: 3 additions & 0 deletions files/ssh/id_rsa
@@ -0,0 +1,3 @@
-----BEGIN RSA PRIVATE KEY-----
PRIVATE KEY HERE
-----END RSA PRIVATE KEY-----
1 change: 1 addition & 0 deletions files/ssh/id_rsa.pub
@@ -0,0 +1 @@
PUBLIC KEY HERE
37 changes: 36 additions & 1 deletion manifests/init.pp
Expand Up @@ -121,5 +121,40 @@
mode => "644",
alias => "mapred-site-xml",
content => template("hadoop/conf/mapred-site.xml.erb"),
}

file { "/home/hduser/.ssh/":
owner => "hduser",
group => "hadoop",
mode => "700",
ensure => "directory",
alias => "hduser-ssh-dir",
}

file { "/home/hduser/.ssh/id_rsa.pub":
ensure => present,
owner => "hduser",
group => "hadoop",
mode => "644",
source => "puppet:///modules/hadoop/ssh/id_rsa.pub",
require => File["hduser-ssh-dir"],
}

file { "/home/hduser/.ssh/id_rsa":
ensure => present,
owner => "hduser",
group => "hadoop",
mode => "600",
source => "puppet:///modules/hadoop/ssh/id_rsa",
require => File["hduser-ssh-dir"],
}

file { "/home/hduser/.ssh/authorized_keys":
ensure => present,
owner => "hduser",
group => "hadoop",
mode => "644",
source => "puppet:///modules/hadoop/ssh/id_rsa.pub",
require => File["hduser-ssh-dir"],
}
}
}
Binary file added pkg/bcarpio-hadoop-0.0.2.tar.gz
Binary file not shown.
4 changes: 4 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/Modulefile
@@ -0,0 +1,4 @@
name 'bcarpio-hadoop'
version '0.0.2'
author 'Brian Carpio'
summary 'Install hadoop and create hdfs file system. Read the README.markdown for more information'
3 changes: 3 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/README
@@ -0,0 +1,3 @@
hadoop

This is the hadoop module.
15 changes: 15 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/README.markdown
@@ -0,0 +1,15 @@
# Hadoop #

This module was created to assist with the installation and configuration of hadoop. Simply edit the params.pp file and mapreduce your self away!

# Configuration #

* A tar.gz file needs to be placed into ~/modules/hadoop/files. You can download hadoop from here: http://hadoop.apache.org/common/releases.html
* Once downloaded the params.pp file needs to be updated with the version downloaded.
* The params.pp also requires the java module I have already published. That or the $java_home variable needs to be properly updated.

# Author #

* Brian Carpio
* http://www.thetek.net
* http://www.linkedin.com/in/briancarpio
Binary file not shown.
125 changes: 125 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/manifests/init.pp
@@ -0,0 +1,125 @@
# /etc/puppet/modules/hadoop/manafests/init.pp

class hadoop {

require hadoop::params

group { "hadoop":
ensure => present,
gid => "800"
}

user { "hduser":
ensure => present,
comment => "Hadoop",
password => "!!",
uid => "800",
gid => "800",
shell => "/bin/bash",
home => "/home/hduser",
require => Group["hadoop"],
}

file { "/home/hduser/.bash_profile":
ensure => present,
owner => "hduser",
group => "hadoop",
alias => "hduser-bash_profile",
content => template("hadoop/home/bash_profile.erb"),
require => User["hduser"]
}

file { "/home/hduser":
ensure => "directory",
owner => "hduser",
group => "hadoop",
alias => "hduser-home",
require => [ User["hduser"], Group["hadoop"] ]
}

file {"$hadoop::params::hdfs_path":
ensure => "directory",
owner => "hduser",
group => "hadoop",
alias => "hdfs-dir",
require => File["hduser-home"]
}

file {"$hadoop::params::hadoop_base":
ensure => "directory",
owner => "hduser",
group => "hadoop",
alias => "hadoop-base",
}

file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}.tar.gz":
mode => 0644,
owner => hduser,
group => hadoop,
source => "puppet:///modules/hadoop/hadoop-${hadoop::params::version}.tar.gz",
alias => "hadoop-source-tgz",
before => Exec["untar-hadoop"],
require => File["hadoop-base"]
}

exec { "untar hadoop-${hadoop::params::version}.tar.gz":
command => "tar -zxf hadoop-${hadoop::params::version}.tar.gz",
cwd => "${hadoop::params::hadoop_base}",
creates => "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}",
alias => "untar-hadoop",
refreshonly => true,
subscribe => File["hadoop-source-tgz"],
user => "hduser",
before => File["hadoop-symlink"]
}

file { "${hadoop::params::hadoop_base}/hadoop":
force => true,
ensure => "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}",
alias => "hadoop-symlink",
owner => "hduser",
group => "hadoop",
require => File["hadoop-source-tgz"],
before => [ File["core-site-xml"], File["hdfs-site-xml"], File["mapred-site-xml"], File["hadoop-env-sh"]]
}

file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/core-site.xml":
owner => "hduser",
group => "hadoop",
mode => "644",
alias => "core-site-xml",
content => template("hadoop/conf/core-site.xml.erb"),
}

file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/hdfs-site.xml":
owner => "hduser",
group => "hadoop",
mode => "644",
alias => "hdfs-site-xml",
content => template("hadoop/conf/hdfs-site.xml.erb"),
}

file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/hadoop-env.sh":
owner => "hduser",
group => "hadoop",
mode => "644",
alias => "hadoop-env-sh",
content => template("hadoop/conf/hadoop-env.sh.erb"),
}

exec { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/bin/hadoop namenode -format":
user => "hduser",
alias => "format-hdfs",
refreshonly => true,
subscribe => File["hdfs-dir"],
require => [ File["hadoop-symlink"], File["java-app-dir"], File["hduser-bash_profile"], File["mapred-site-xml"], File["hdfs-site-xml"], File["core-site-xml"], File["hadoop-env-sh"]]
}

file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/mapred-site.xml":
owner => "hduser",
group => "hadoop",
mode => "644",
alias => "mapred-site-xml",
content => template("hadoop/conf/mapred-site.xml.erb"),
}
}
36 changes: 36 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/manifests/params.pp
@@ -0,0 +1,36 @@
# /etc/puppet/modules/hadoop/manafests/init.pp

class hadoop::params {

include java::params

$version = $::hostname ? {
default => "0.20.203.0",
}

$master = $::hostname ? {
default => "hadoop01",
}

$hdfsport = $::hostname ? {
default => "8020",
}

$replication = $::hostname ? {
default => "2",
}

$jobtrackerport = $::hostname ? {
default => "8021",
}
$java_home = $::hostname ? {
default => "${java::params::java_base}/jdk${java::params::java_version}",
}
$hadoop_base = $::hostname ? {
default => "/opt/hadoop",
}
$hdfs_path = $::hostname ? {
default => "/home/hduser/hdfs",
}

}
33 changes: 33 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/metadata.json
@@ -0,0 +1,33 @@
{
"license": "UNKNOWN",
"source": "UNKNOWN",
"dependencies": [

],
"types": [

],
"project_page": "UNKNOWN",
"description": "UNKNOWN",
"summary": "Install hadoop and create hdfs file system. Read the README.markdown for more information",
"author": "Brian Carpio",
"version": "0.0.2",
"name": "bcarpio-hadoop",
"checksums": {
"metadata.json": "d34d0b70aba36510fbc2df4e667479ef",
"spec/spec.opts": "a600ded995d948e393fbe2320ba8e51c",
"spec/spec_helper.rb": "ca19ec4f451ebc7fdb035b52eae6e909",
"templates/home/bash_profile.erb": "724ea20d657f448f72ede2c010170eae",
"templates/conf/hadoop-env.sh.erb": "b4a50976705d367d863439fa9b48c7f2",
"manifests/init.pp": "a27827d2a373c5675ce8f69df309fd9d",
"tests/init.pp": "f8f7d7631f8c82d2f068acdb95bf5011",
"files/hadoop-0.20.203.0.tar.gz": "29a4a2f85ebb3fb2bb3c66b833fb723d",
"templates/conf/mapred-site.xml.erb": "5bb6506a2391176fa8ec3b9becce1cc7",
"Modulefile": "286c71f935a1b37495a778b36eead087",
"README.markdown": "2003d6576c13c8608130be5cec173781",
"README": "7ecfbb37a09181c422aa8887fe5f8e9b",
"manifests/params.pp": "d5020929e8f15871a1716b4725161107",
"templates/conf/hdfs-site.xml.erb": "ac646459b407cc9d13e610adcb85b806",
"templates/conf/core-site.xml.erb": "48da047d29fbde099d9b7b7677e2e42b"
}
}
6 changes: 6 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/spec/spec.opts
@@ -0,0 +1,6 @@
--format
s
--colour
--loadby
mtime
--backtrace
18 changes: 18 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/spec/spec_helper.rb
@@ -0,0 +1,18 @@
require 'pathname'
dir = Pathname.new(__FILE__).parent
$LOAD_PATH.unshift(dir, dir + 'lib', dir + '../lib')

require 'mocha'
require 'puppet'
gem 'rspec', '=1.2.9'
require 'spec/autorun'

Spec::Runner.configure do |config|
config.mock_with :mocha
end

# We need this because the RAL uses 'should' as a method. This
# allows us the same behaviour but with a different method name.
class Object
alias :must :should
end
18 changes: 18 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/templates/conf/core-site.xml.erb
@@ -0,0 +1,18 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value><%= scope.lookupvar('hadoop::params::hdfs_path') %></value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://<%= scope.lookupvar('hadoop::params::master') %>:<%= scope.lookupvar('hadoop::params::hdfsport') %></value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>
54 changes: 54 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/templates/conf/hadoop-env.sh.erb
@@ -0,0 +1,54 @@
# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use. Required.
export JAVA_HOME=<%= scope.lookupvar('hadoop::params::java_home') %>

# Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH=

# The maximum amount of heap to use, in MB. Default is 1000.
# export HADOOP_HEAPSIZE=2000

# Extra Java runtime options. Empty by default.
# export HADOOP_OPTS=-server

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
# export HADOOP_TASKTRACKER_OPTS=
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
# export HADOOP_CLIENT_OPTS

# Extra ssh options. Empty by default.
# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"

# Where log files are stored. $HADOOP_HOME/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs

# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves

# host:path where hadoop code should be rsync'd from. Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop

# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HADOOP_SLAVE_SLEEP=0.1

# The directory where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/var/hadoop/pids

# A string representing this instance of hadoop. $USER by default.
# export HADOOP_IDENT_STRING=$USER

# The scheduling priority for daemon processes. See 'man nice'.
# export HADOOP_NICENESS=10
12 changes: 12 additions & 0 deletions pkg/bcarpio-hadoop-0.0.2/templates/conf/hdfs-site.xml.erb
@@ -0,0 +1,12 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value><%= scope.lookupvar('hadoop::params::replication') %></value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
The default is used if replication is not specified in create time.
</description>
</property>
</configuration>

0 comments on commit 6ff77b7

Please sign in to comment.