Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

added pkg dir and ssh keys

  • Loading branch information...
commit 6ff77b732aa98a3809da00b69ee5a25a49cd7286 1 parent 42b8ab1
@bcarpio authored
View
13 Modulefile
@@ -1,11 +1,4 @@
name 'bcarpio-hadoop'
-version '0.0.1'
-source 'UNKNOWN'
-author 'bcarpio'
-license 'UNKNOWN'
-summary 'UNKNOWN'
-description 'UNKNOWN'
-project_page 'UNKNOWN'
-
-## Add dependencies, if any:
-# dependency 'username/name', '>= 1.2.0'
+version '0.0.2'
+author 'Brian Carpio'
+summary 'Install hadoop and create hdfs file system. Read the README.markdown for more information'
View
3  files/ssh/id_rsa
@@ -0,0 +1,3 @@
+-----BEGIN RSA PRIVATE KEY-----
+PRIVATE KEY HERE
+-----END RSA PRIVATE KEY-----
View
1  files/ssh/id_rsa.pub
@@ -0,0 +1 @@
+PUBLIC KEY HERE
View
37 manifests/init.pp
@@ -121,5 +121,40 @@
mode => "644",
alias => "mapred-site-xml",
content => template("hadoop/conf/mapred-site.xml.erb"),
+ }
+
+ file { "/home/hduser/.ssh/":
+ owner => "hduser",
+ group => "hadoop",
+ mode => "700",
+ ensure => "directory",
+ alias => "hduser-ssh-dir",
+ }
+
+ file { "/home/hduser/.ssh/id_rsa.pub":
+ ensure => present,
+ owner => "hduser",
+ group => "hadoop",
+ mode => "644",
+ source => "puppet:///modules/hadoop/ssh/id_rsa.pub",
+ require => File["hduser-ssh-dir"],
+ }
+
+ file { "/home/hduser/.ssh/id_rsa":
+ ensure => present,
+ owner => "hduser",
+ group => "hadoop",
+ mode => "600",
+ source => "puppet:///modules/hadoop/ssh/id_rsa",
+ require => File["hduser-ssh-dir"],
+ }
+
+ file { "/home/hduser/.ssh/authorized_keys":
+ ensure => present,
+ owner => "hduser",
+ group => "hadoop",
+ mode => "644",
+ source => "puppet:///modules/hadoop/ssh/id_rsa.pub",
+ require => File["hduser-ssh-dir"],
}
-}
+}
View
BIN  pkg/bcarpio-hadoop-0.0.2.tar.gz
Binary file not shown
View
4 pkg/bcarpio-hadoop-0.0.2/Modulefile
@@ -0,0 +1,4 @@
+name 'bcarpio-hadoop'
+version '0.0.2'
+author 'Brian Carpio'
+summary 'Install hadoop and create hdfs file system. Read the README.markdown for more information'
View
3  pkg/bcarpio-hadoop-0.0.2/README
@@ -0,0 +1,3 @@
+hadoop
+
+This is the hadoop module.
View
15 pkg/bcarpio-hadoop-0.0.2/README.markdown
@@ -0,0 +1,15 @@
+# Hadoop #
+
+This module was created to assist with the installation and configuration of hadoop. Simply edit the params.pp file and mapreduce your self away!
+
+# Configuration #
+
+* A tar.gz file needs to be placed into ~/modules/hadoop/files. You can download hadoop from here: http://hadoop.apache.org/common/releases.html
+* Once downloaded the params.pp file needs to be updated with the version downloaded.
+* The params.pp also requires the java module I have already published. That or the $java_home variable needs to be properly updated.
+
+# Author #
+
+* Brian Carpio
+* http://www.thetek.net
+* http://www.linkedin.com/in/briancarpio
View
BIN  pkg/bcarpio-hadoop-0.0.2/files/hadoop-0.20.203.0.tar.gz
Binary file not shown
View
125 pkg/bcarpio-hadoop-0.0.2/manifests/init.pp
@@ -0,0 +1,125 @@
+# /etc/puppet/modules/hadoop/manafests/init.pp
+
+class hadoop {
+
+ require hadoop::params
+
+ group { "hadoop":
+ ensure => present,
+ gid => "800"
+ }
+
+ user { "hduser":
+ ensure => present,
+ comment => "Hadoop",
+ password => "!!",
+ uid => "800",
+ gid => "800",
+ shell => "/bin/bash",
+ home => "/home/hduser",
+ require => Group["hadoop"],
+ }
+
+ file { "/home/hduser/.bash_profile":
+ ensure => present,
+ owner => "hduser",
+ group => "hadoop",
+ alias => "hduser-bash_profile",
+ content => template("hadoop/home/bash_profile.erb"),
+ require => User["hduser"]
+ }
+
+ file { "/home/hduser":
+ ensure => "directory",
+ owner => "hduser",
+ group => "hadoop",
+ alias => "hduser-home",
+ require => [ User["hduser"], Group["hadoop"] ]
+ }
+
+ file {"$hadoop::params::hdfs_path":
+ ensure => "directory",
+ owner => "hduser",
+ group => "hadoop",
+ alias => "hdfs-dir",
+ require => File["hduser-home"]
+ }
+
+ file {"$hadoop::params::hadoop_base":
+ ensure => "directory",
+ owner => "hduser",
+ group => "hadoop",
+ alias => "hadoop-base",
+ }
+
+ file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}.tar.gz":
+ mode => 0644,
+ owner => hduser,
+ group => hadoop,
+ source => "puppet:///modules/hadoop/hadoop-${hadoop::params::version}.tar.gz",
+ alias => "hadoop-source-tgz",
+ before => Exec["untar-hadoop"],
+ require => File["hadoop-base"]
+ }
+
+ exec { "untar hadoop-${hadoop::params::version}.tar.gz":
+ command => "tar -zxf hadoop-${hadoop::params::version}.tar.gz",
+ cwd => "${hadoop::params::hadoop_base}",
+ creates => "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}",
+ alias => "untar-hadoop",
+ refreshonly => true,
+ subscribe => File["hadoop-source-tgz"],
+ user => "hduser",
+ before => File["hadoop-symlink"]
+ }
+
+ file { "${hadoop::params::hadoop_base}/hadoop":
+ force => true,
+ ensure => "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}",
+ alias => "hadoop-symlink",
+ owner => "hduser",
+ group => "hadoop",
+ require => File["hadoop-source-tgz"],
+ before => [ File["core-site-xml"], File["hdfs-site-xml"], File["mapred-site-xml"], File["hadoop-env-sh"]]
+ }
+
+ file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/core-site.xml":
+ owner => "hduser",
+ group => "hadoop",
+ mode => "644",
+ alias => "core-site-xml",
+ content => template("hadoop/conf/core-site.xml.erb"),
+ }
+
+ file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/hdfs-site.xml":
+ owner => "hduser",
+ group => "hadoop",
+ mode => "644",
+ alias => "hdfs-site-xml",
+ content => template("hadoop/conf/hdfs-site.xml.erb"),
+ }
+
+ file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/hadoop-env.sh":
+ owner => "hduser",
+ group => "hadoop",
+ mode => "644",
+ alias => "hadoop-env-sh",
+ content => template("hadoop/conf/hadoop-env.sh.erb"),
+ }
+
+ exec { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/bin/hadoop namenode -format":
+ user => "hduser",
+ alias => "format-hdfs",
+ refreshonly => true,
+ subscribe => File["hdfs-dir"],
+ require => [ File["hadoop-symlink"], File["java-app-dir"], File["hduser-bash_profile"], File["mapred-site-xml"], File["hdfs-site-xml"], File["core-site-xml"], File["hadoop-env-sh"]]
+ }
+
+ file { "${hadoop::params::hadoop_base}/hadoop-${hadoop::params::version}/conf/mapred-site.xml":
+ owner => "hduser",
+ group => "hadoop",
+ mode => "644",
+ alias => "mapred-site-xml",
+ content => template("hadoop/conf/mapred-site.xml.erb"),
+ }
+}
View
36 pkg/bcarpio-hadoop-0.0.2/manifests/params.pp
@@ -0,0 +1,36 @@
+# /etc/puppet/modules/hadoop/manafests/init.pp
+
+class hadoop::params {
+
+ include java::params
+
+ $version = $::hostname ? {
+ default => "0.20.203.0",
+ }
+
+ $master = $::hostname ? {
+ default => "hadoop01",
+ }
+
+ $hdfsport = $::hostname ? {
+ default => "8020",
+ }
+
+ $replication = $::hostname ? {
+ default => "2",
+ }
+
+ $jobtrackerport = $::hostname ? {
+ default => "8021",
+ }
+ $java_home = $::hostname ? {
+ default => "${java::params::java_base}/jdk${java::params::java_version}",
+ }
+ $hadoop_base = $::hostname ? {
+ default => "/opt/hadoop",
+ }
+ $hdfs_path = $::hostname ? {
+ default => "/home/hduser/hdfs",
+ }
+
+}
View
33 pkg/bcarpio-hadoop-0.0.2/metadata.json
@@ -0,0 +1,33 @@
+{
+ "license": "UNKNOWN",
+ "source": "UNKNOWN",
+ "dependencies": [
+
+ ],
+ "types": [
+
+ ],
+ "project_page": "UNKNOWN",
+ "description": "UNKNOWN",
+ "summary": "Install hadoop and create hdfs file system. Read the README.markdown for more information",
+ "author": "Brian Carpio",
+ "version": "0.0.2",
+ "name": "bcarpio-hadoop",
+ "checksums": {
+ "metadata.json": "d34d0b70aba36510fbc2df4e667479ef",
+ "spec/spec.opts": "a600ded995d948e393fbe2320ba8e51c",
+ "spec/spec_helper.rb": "ca19ec4f451ebc7fdb035b52eae6e909",
+ "templates/home/bash_profile.erb": "724ea20d657f448f72ede2c010170eae",
+ "templates/conf/hadoop-env.sh.erb": "b4a50976705d367d863439fa9b48c7f2",
+ "manifests/init.pp": "a27827d2a373c5675ce8f69df309fd9d",
+ "tests/init.pp": "f8f7d7631f8c82d2f068acdb95bf5011",
+ "files/hadoop-0.20.203.0.tar.gz": "29a4a2f85ebb3fb2bb3c66b833fb723d",
+ "templates/conf/mapred-site.xml.erb": "5bb6506a2391176fa8ec3b9becce1cc7",
+ "Modulefile": "286c71f935a1b37495a778b36eead087",
+ "README.markdown": "2003d6576c13c8608130be5cec173781",
+ "README": "7ecfbb37a09181c422aa8887fe5f8e9b",
+ "manifests/params.pp": "d5020929e8f15871a1716b4725161107",
+ "templates/conf/hdfs-site.xml.erb": "ac646459b407cc9d13e610adcb85b806",
+ "templates/conf/core-site.xml.erb": "48da047d29fbde099d9b7b7677e2e42b"
+ }
+}
View
6 pkg/bcarpio-hadoop-0.0.2/spec/spec.opts
@@ -0,0 +1,6 @@
+--format
+s
+--colour
+--loadby
+mtime
+--backtrace
View
18 pkg/bcarpio-hadoop-0.0.2/spec/spec_helper.rb
@@ -0,0 +1,18 @@
+require 'pathname'
+dir = Pathname.new(__FILE__).parent
+$LOAD_PATH.unshift(dir, dir + 'lib', dir + '../lib')
+
+require 'mocha'
+require 'puppet'
+gem 'rspec', '=1.2.9'
+require 'spec/autorun'
+
+Spec::Runner.configure do |config|
+ config.mock_with :mocha
+end
+
+# We need this because the RAL uses 'should' as a method. This
+# allows us the same behaviour but with a different method name.
+class Object
+ alias :must :should
+end
View
18 pkg/bcarpio-hadoop-0.0.2/templates/conf/core-site.xml.erb
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+<property>
+ <name>hadoop.tmp.dir</name>
+ <value><%= scope.lookupvar('hadoop::params::hdfs_path') %></value>
+ <description>A base for other temporary directories.</description>
+</property>
+<property>
+ <name>fs.default.name</name>
+ <value>hdfs://<%= scope.lookupvar('hadoop::params::master') %>:<%= scope.lookupvar('hadoop::params::hdfsport') %></value>
+ <description>The name of the default file system. A URI whose
+ scheme and authority determine the FileSystem implementation. The
+ uri's scheme determines the config property (fs.SCHEME.impl) naming
+ the FileSystem implementation class. The uri's authority is used to
+ determine the host, port, etc. for a filesystem.</description>
+</property>
+</configuration>
View
54 pkg/bcarpio-hadoop-0.0.2/templates/conf/hadoop-env.sh.erb
@@ -0,0 +1,54 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME. All others are
+# optional. When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use. Required.
+export JAVA_HOME=<%= scope.lookupvar('hadoop::params::java_home') %>
+
+# Extra Java CLASSPATH elements. Optional.
+# export HADOOP_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options. Empty by default.
+# export HADOOP_OPTS=-server
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+# export HADOOP_TASKTRACKER_OPTS=
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+# export HADOOP_CLIENT_OPTS
+
+# Extra ssh options. Empty by default.
+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+# Where log files are stored. $HADOOP_HOME/logs by default.
+# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+# host:path where hadoop code should be rsync'd from. Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands. Unset by default. This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+# export HADOOP_PID_DIR=/var/hadoop/pids
+
+# A string representing this instance of hadoop. $USER by default.
+# export HADOOP_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes. See 'man nice'.
+# export HADOOP_NICENESS=10
View
12 pkg/bcarpio-hadoop-0.0.2/templates/conf/hdfs-site.xml.erb
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+<property>
+ <name>dfs.replication</name>
+ <value><%= scope.lookupvar('hadoop::params::replication') %></value>
+ <description>Default block replication.
+ The actual number of replications can be specified when the file is created.
+ The default is used if replication is not specified in create time.
+ </description>
+</property>
+</configuration>
View
12 pkg/bcarpio-hadoop-0.0.2/templates/conf/mapred-site.xml.erb
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<configuration>
+<property>
+ <name>mapred.job.tracker</name>
+ <value><%= scope.lookupvar('hadoop::params::master') %>:<%= scope.lookupvar('hadoop::params::jobtrackerport') %></value>
+ <description>The host and port that the MapReduce job tracker runs
+ at. If "local", then jobs are run in-process as a single map
+ and reduce task.
+ </description>
+</property>
+</configuration>
View
2  pkg/bcarpio-hadoop-0.0.2/templates/home/bash_profile.erb
@@ -0,0 +1,2 @@
+export PATH=$PATH:<%= scope.lookupvar('hadoop::params::java_home') %>/bin:<%= scope.lookupvar('hadoop::params::hadoop_base') %>/hadoop/bin
+export JAVA_HOME=<%= scope.lookupvar('hadoop::params::java_home') %>
View
1  pkg/bcarpio-hadoop-0.0.2/tests/init.pp
@@ -0,0 +1 @@
+include hadoop
View
27 templates/home/bash_profile.erb
@@ -1,2 +1,27 @@
+# Set Users path
export PATH=$PATH:<%= scope.lookupvar('hadoop::params::java_home') %>/bin:<%= scope.lookupvar('hadoop::params::hadoop_base') %>/hadoop/bin
-export JAVA_HOME=<%= scope.lookupvar('hadoop::params::java_home') %>
+
+# Set Hadoop-related environment variables
+export HADOOP_HOME=<%= scope.lookupvar('hadoop::params::hadoop_base') %>/hadoop
+
+# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on)
+export JAVA_HOME=<%= scope.lookupvar('hadoop::params::java_home') %>
+
+# Some convenient aliases and functions for running Hadoop-related commands
+unalias fs &> /dev/null
+alias fs="hadoop fs"
+unalias hls &> /dev/null
+alias hls="fs -ls"
+
+# If you have LZO compression enabled in your Hadoop cluster and
+# compress job outputs with LZOP (not covered in this tutorial):
+# Conveniently inspect an LZOP compressed file from the command
+# line; run via:
+#
+# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo
+#
+# Requires installed 'lzop' command.
+#
+lzohead () {
+ hadoop fs -cat $1 | lzop -dc | head -1000 | less
+}
Please sign in to comment.
Something went wrong with that request. Please try again.