Permalink
Browse files

Preview of MAPREDUCE-279 merged to trunk.

  • Loading branch information...
1 parent 641901a commit 97f84ae297cd3d8a76fa7dc8a73818b79c851716 @acmurthy acmurthy committed Aug 17, 2011
Showing 2,919 changed files with 157,825 additions and 19,169 deletions.
File renamed without changes.
File renamed without changes.
View
@@ -0,0 +1,89 @@
+To compile Hadoop Mapreduce next following, do the following:
+
+Step 1) Install dependencies for yarn
+
+See http://svn.apache.org/repos/asf/hadoop/common/branches/MR-279/mapreduce/yarn/README
+Make sure protbuf library is in your library path or set: export LD_LIBRARY_PATH=/usr/local/lib
+
+Step 2) Checkout
+
+svn checkout http://svn.apache.org/repos/asf/hadoop/common/branches/MR-279/
+
+Step 3) Build common
+
+Go to common directory
+ant veryclean mvn-install
+
+Step 4) Build HDFS
+
+Go to hdfs directory
+ant veryclean mvn-install -Dresolvers=internal
+
+Step 5) Build yarn and mapreduce
+
+Go to mapreduce directory
+export MAVEN_OPTS=-Xmx512m
+
+mvn clean install assembly:assembly
+ant veryclean jar jar-test -Dresolvers=internal
+
+In case you want to skip the tests run:
+
+mvn clean install assembly:assembly -DskipTests
+ant veryclean jar jar-test -Dresolvers=internal
+
+You will see a tarball in
+ls target/hadoop-mapreduce-1.0-SNAPSHOT-bin.tar.gz
+
+Step 6) Untar the tarball in a clean and different directory.
+say HADOOP_YARN_INSTALL
+
+To run Hadoop Mapreduce next applications :
+
+Step 7) cd $HADOOP_YARN_INSTALL
+
+Step 8) export the following variables:
+
+HADOOP_MAPRED_HOME=
+HADOOP_COMMON_HOME=
+HADOOP_HDFS_HOME=
+YARN_HOME=directory where you untarred yarn
+HADOOP_CONF_DIR=
+YARN_CONF_DIR=$HADOOP_CONF_DIR
+
+Step 9) bin/yarn-daemon.sh start resourcemanager
+
+Step 10) bin/yarn-daemon.sh start nodemanager
+
+Step 11) bin/yarn-daemon.sh start historyserver
+
+Step 12) Create the following symlinks in hadoop-common/lib
+
+ln -s $HADOOP_YARN_INSTALL/modules/hadoop-mapreduce-client-app-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/modules/yarn-api-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/modules/hadoop-mapreduce-client-common-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/modules/yarn-common-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/modules/hadoop-mapreduce-client-core-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/modules/yarn-server-common-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/modules/hadoop-mapreduce-client-jobclient-1.0-SNAPSHOT.jar .
+ln -s $HADOOP_YARN_INSTALL/lib/protobuf-java-2.4.0a.jar .
+
+Step 13) Yarn daemons are up! But for running mapreduce applications, which now are in user land, you need to setup nodemanager with the following configuration in your yarn-site.xml before you start the nodemanager.
+ <property>
+ <name>nodemanager.auxiluary.services</name>
+ <value>mapreduce.shuffle</value>
+ </property>
+
+ <property>
+ <name>nodemanager.aux.service.mapreduce.shuffle.class</name>
+ <value>org.apache.hadoop.mapred.ShuffleHandler</value>
+ </property>
+
+Step 14) You are all set, an example on how to run a mapreduce job is:
+
+cd $HADOOP_MAPRED_HOME
+ant examples -Dresolvers=internal
+$HADOOP_COMMON_HOME/bin/hadoop jar $HADOOP_MAPRED_HOME/build/hadoop-mapred-examples-0.22.0-SNAPSHOT.jar randomwriter -Dmapreduce.job.user.name=$USER -Dmapreduce.clientfactory.class.name=org.apache.hadoop.mapred.YarnClientFactory -Dmapreduce.randomwriter.bytespermap=10000 -Ddfs.blocksize=536870912 -Ddfs.block.size=536870912 -libjars $HADOOP_YARN_INSTALL/hadoop-mapreduce-1.0-SNAPSHOT/modules/hadoop-mapreduce-client-jobclient-1.0-SNAPSHOT.jar output
+
+The output on the command line should be almost similar to what you see in the JT/TT setup (Hadoop 0.20/0.21)
+
File renamed without changes.
File renamed without changes.
@@ -0,0 +1,101 @@
+<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
+ <id>all</id>
+ <formats>
+ <format>tar.gz</format>
+ </formats>
+ <includeBaseDirectory>true</includeBaseDirectory>
+ <!-- TODO: this layout is wrong. We need module specific bin files in module specific dirs -->
+ <fileSets>
+ <fileSet>
+ <directory>hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/classes/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <includes>
+ <include>container-executor</include>
+ </includes>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>hadoop-yarn/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <includes>
+ <include>*</include>
+ </includes>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <includes>
+ <include>*</include>
+ </includes>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>hadoop-yarn/conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ <includes>
+ <include>**/*</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <outputDirectory>sources</outputDirectory>
+ <excludes>
+ <exclude>**/*.jar</exclude>
+ <exclude>**/target/**</exclude>
+ <!-- scripts to include later for setting fileMode -->
+ <exclude>**/bin/*</exclude>
+ <exclude>**/scripts/*</exclude>
+ <!-- images that we don't need (and cause problems for our tools) -->
+ <exclude>**/dt-*/images/**</exclude>
+ <!-- until the code that does this is fixed -->
+ <exclude>**/file:/**</exclude>
+ <exclude>**/SecurityAuth.audit*</exclude>
+ </excludes>
+ <includes>
+ <include>assembly/**</include>
+ <include>pom.xml</include>
+ <include>build*.xml</include>
+ <include>ivy.xml</include>
+ <include>ivy/**</include>
+ <include>INSTALL</include>
+ <include>LICENSE.txt</include>
+ <include>mr-client/**</include>
+ <include>hadoop-yarn/**</include>
+ <include>src/**</include>
+ </includes>
+ </fileSet>
+ <fileSet>
+ <outputDirectory>sources</outputDirectory>
+ <includes>
+ <include>**/bin/*</include>
+ <include>**/scripts/*</include>
+ </includes>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ </fileSets>
+ <moduleSets>
+ <moduleSet>
+ <excludes>
+ <exclude>org.apache.hadoop:hadoop-yarn-server-tests</exclude>
+ </excludes>
+ <binaries>
+ <outputDirectory>modules</outputDirectory>
+ <includeDependencies>false</includeDependencies>
+ <unpack>false</unpack>
+ </binaries>
+ </moduleSet>
+ </moduleSets>
+ <dependencySets>
+ <dependencySet>
+ <useProjectArtifact>false</useProjectArtifact>
+ <outputDirectory>/lib</outputDirectory>
+ <!-- Exclude hadoop artifacts. They will be found via HADOOP* env -->
+ <excludes>
+ <exclude>org.apache.hadoop:hadoop-common</exclude>
+ <exclude>org.apache.hadoop:hadoop-hdfs</exclude>
+ </excludes>
+ </dependencySet>
+ </dependencySets>
+</assembly>
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
@@ -0,0 +1,103 @@
+<?xml version="1.0"?>
+<project>
+ <parent>
+ <artifactId>hadoop-mapreduce-client</artifactId>
+ <groupId>org.apache.hadoop</groupId>
+ <version>${hadoop-mapreduce.version}</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-app</artifactId>
+ <name>hadoop-mapreduce-client-app</name>
+
+ <properties>
+ <install.file>${project.artifact.file}</install.file>
+ <applink.base>${project.build.directory}/${project.name}</applink.base>
+ <mr.basedir>${project.parent.parent.basedir}</mr.basedir>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-common</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-common</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-nodemanager</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-shuffle</artifactId>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <!-- local name for links -->
+ <finalName>mr-app</finalName>
+ <plugins>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ <phase>test-compile</phase>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>build-classpath</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>build-classpath</goal>
+ </goals>
+ <configuration>
+ <outputFile>target/classes/mrapp-generated-classpath</outputFile>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-antrun-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>create-mr-app-symlinks</id>
+ <phase>package</phase>
+ <configuration>
+ <target>
+ <symlink link="${applink.base}.jar"
+ resource="mr-app.jar" failonerror="false"/>
+ <symlink link="${applink.base}-1.0-SNAPSHOT.jar"
+ resource="mr-app.jar" failonerror="false"/>
+ </target>
+ </configuration>
+ <goals>
+ <goal>run</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
@@ -0,0 +1,67 @@
+package org.apache.hadoop;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobContext;
+import org.apache.hadoop.mapred.OutputCommitter;
+import org.apache.hadoop.mapred.TaskAttemptContext;
+
+public class CustomOutputCommitter extends OutputCommitter {
+
+ public static final String JOB_SETUP_FILE_NAME = "_job_setup";
+ public static final String JOB_COMMIT_FILE_NAME = "_job_commit";
+ public static final String JOB_ABORT_FILE_NAME = "_job_abort";
+ public static final String TASK_SETUP_FILE_NAME = "_task_setup";
+ public static final String TASK_ABORT_FILE_NAME = "_task_abort";
+ public static final String TASK_COMMIT_FILE_NAME = "_task_commit";
+
+ @Override
+ public void setupJob(JobContext jobContext) throws IOException {
+ writeFile(jobContext.getJobConf(), JOB_SETUP_FILE_NAME);
+ }
+
+ @Override
+ public void commitJob(JobContext jobContext) throws IOException {
+ super.commitJob(jobContext);
+ writeFile(jobContext.getJobConf(), JOB_COMMIT_FILE_NAME);
+ }
+
+ @Override
+ public void abortJob(JobContext jobContext, int status)
+ throws IOException {
+ super.abortJob(jobContext, status);
+ writeFile(jobContext.getJobConf(), JOB_ABORT_FILE_NAME);
+ }
+
+ @Override
+ public void setupTask(TaskAttemptContext taskContext) throws IOException {
+ writeFile(taskContext.getJobConf(), TASK_SETUP_FILE_NAME);
+ }
+
+ @Override
+ public boolean needsTaskCommit(TaskAttemptContext taskContext)
+ throws IOException {
+ return true;
+ }
+
+ @Override
+ public void commitTask(TaskAttemptContext taskContext) throws IOException {
+ writeFile(taskContext.getJobConf(), TASK_COMMIT_FILE_NAME);
+ }
+
+ @Override
+ public void abortTask(TaskAttemptContext taskContext) throws IOException {
+ writeFile(taskContext.getJobConf(), TASK_ABORT_FILE_NAME);
+ }
+
+ private void writeFile(JobConf conf , String filename) throws IOException {
+ System.out.println("writing file ----" + filename);
+ Path outputPath = FileOutputFormat.getOutputPath(conf);
+ FileSystem fs = outputPath.getFileSystem(conf);
+ fs.create(new Path(outputPath, filename)).close();
+ }
+}
Oops, something went wrong.

0 comments on commit 97f84ae

Please sign in to comment.