Skip to content

Commit

Permalink
Minor improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
castagna committed Jan 18, 2011
1 parent f726d97 commit 735ea3b
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 32 deletions.
41 changes: 21 additions & 20 deletions .classpath
Original file line number Diff line number Diff line change
@@ -1,35 +1,41 @@
<classpath>
<classpathentry kind="src" path="src/test/java" including="**/*.java"/>
<classpathentry kind="src" path="src/test/resources" excluding="**/*.java"/>
<classpathentry kind="src" path="src/main/java" including="**/*.java"/>
<classpathentry kind="src" path="src/main/resources" excluding="**/*.java"/>
<classpathentry kind="output" path="target/classes-eclipse"/>
<classpathentry kind="var" path="M2_REPO/ant/ant/1.6.5/ant-1.6.5.jar" sourcepath="M2_REPO/ant/ant/1.6.5/ant-1.6.5-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/arq/2.8.7/arq-2.8.7.jar" sourcepath="M2_REPO/com/hp/hpl/jena/arq/2.8.7/arq-2.8.7-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/commons-cli/commons-cli/1.2/commons-cli-1.2.jar" sourcepath="M2_REPO/commons-cli/commons-cli/1.2/commons-cli-1.2-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/commons-cli/commons-cli/1.2/commons-cli-1.2.jar" sourcepath="M2_REPO/commons-cli/commons-cli/1.2/commons-cli-1.2-sources.jar">
<attributes>
<attribute value="jar:file:/home/castagna/.m2/repository/commons-cli/commons-cli/1.2/commons-cli-1.2-javadoc.jar!/" name="javadoc_location"/>
</attributes>
</classpathentry>
<classpathentry kind="var" path="M2_REPO/commons-codec/commons-codec/1.3/commons-codec-1.3.jar" sourcepath="M2_REPO/commons-codec/commons-codec/1.3/commons-codec-1.3-sources.jar">
<attributes>
<attribute value="jar:file:/home/castagna/.m2/repository/commons-codec/commons-codec/1.3/commons-codec-1.3-javadoc.jar!/" name="javadoc_location"/>
</attributes>
</classpathentry>
<classpathentry kind="var" path="M2_REPO/commons-el/commons-el/1.0/commons-el-1.0.jar" sourcepath="M2_REPO/commons-el/commons-el/1.0/commons-el-1.0-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/commons-el/commons-el/1.0/commons-el-1.0.jar" sourcepath="M2_REPO/commons-el/commons-el/1.0/commons-el-1.0-sources.jar">
<attributes>
<attribute value="jar:file:/home/castagna/.m2/repository/commons-el/commons-el/1.0/commons-el-1.0-javadoc.jar!/" name="javadoc_location"/>
</attributes>
</classpathentry>
<classpathentry kind="var" path="M2_REPO/commons-httpclient/commons-httpclient/3.0.1/commons-httpclient-3.0.1.jar" sourcepath="M2_REPO/commons-httpclient/commons-httpclient/3.0.1/commons-httpclient-3.0.1-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/commons-logging/commons-logging/1.0.3/commons-logging-1.0.3.jar"/>
<classpathentry kind="var" path="M2_REPO/commons-net/commons-net/1.4.1/commons-net-1.4.1.jar" sourcepath="M2_REPO/commons-net/commons-net/1.4.1/commons-net-1.4.1-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/eclipse/jdt/core/3.1.1/core-3.1.1.jar"/>
<classpathentry kind="var" path="M2_REPO/com/cloudera/hadoop/hadoop-core/0.20.2-737/hadoop-core-0.20.2-737.jar" sourcepath="M2_REPO/com/cloudera/hadoop/hadoop-core/0.20.2-737/hadoop-core-0.20.2-737-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/hsqldb/hsqldb/1.8.0.10/hsqldb-1.8.0.10.jar"/>
<classpathentry kind="var" path="M2_REPO/com/ibm/icu/icu4j/3.4.4/icu4j-3.4.4.jar" sourcepath="M2_REPO/com/ibm/icu/icu4j/3.4.4/icu4j-3.4.4-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/iri/0.8/iri-0.8.jar" sourcepath="M2_REPO/com/hp/hpl/jena/iri/0.8/iri-0.8-sources.jar">
<classpathentry kind="var" path="M2_REPO/commons-logging/commons-logging/1.0.3/commons-logging-1.0.3.jar">
<attributes>
<attribute value="jar:file:/home/castagna/.m2/repository/commons-logging/commons-logging/1.0.3/commons-logging-1.0.3-javadoc.jar!/" name="javadoc_location"/>
</attributes>
</classpathentry>
<classpathentry kind="var" path="M2_REPO/commons-net/commons-net/1.4.1/commons-net-1.4.1.jar" sourcepath="M2_REPO/commons-net/commons-net/1.4.1/commons-net-1.4.1-sources.jar">
<attributes>
<attribute value="jar:file:/home/castagna/.m2/repository/com/hp/hpl/jena/iri/0.8/iri-0.8-javadoc.jar!/" name="javadoc_location"/>
<attribute value="jar:file:/home/castagna/.m2/repository/commons-net/commons-net/1.4.1/commons-net-1.4.1-javadoc.jar!/" name="javadoc_location"/>
</attributes>
</classpathentry>
<classpathentry kind="var" path="M2_REPO/org/eclipse/jdt/core/3.1.1/core-3.1.1.jar"/>
<classpathentry kind="var" path="M2_REPO/com/cloudera/hadoop/hadoop-core/0.20.2-737/hadoop-core-0.20.2-737.jar" sourcepath="M2_REPO/com/cloudera/hadoop/hadoop-core/0.20.2-737/hadoop-core-0.20.2-737-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/hsqldb/hsqldb/1.8.0.10/hsqldb-1.8.0.10.jar"/>
<classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-core-asl/1.5.2/jackson-core-asl-1.5.2.jar" sourcepath="M2_REPO/org/codehaus/jackson/jackson-core-asl/1.5.2/jackson-core-asl-1.5.2-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/codehaus/jackson/jackson-mapper-asl/1.5.2/jackson-mapper-asl-1.5.2.jar" sourcepath="M2_REPO/org/codehaus/jackson/jackson-mapper-asl/1.5.2/jackson-mapper-asl-1.5.2-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/tomcat/jasper-compiler/5.5.12/jasper-compiler-5.5.12.jar"/>
<classpathentry kind="var" path="M2_REPO/tomcat/jasper-runtime/5.5.12/jasper-runtime-5.5.12.jar"/>
<classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/jena/2.6.4/jena-2.6.4.jar" sourcepath="M2_REPO/com/hp/hpl/jena/jena/2.6.4/jena-2.6.4-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/net/java/dev/jets3t/jets3t/0.7.1/jets3t-0.7.1.jar" sourcepath="M2_REPO/net/java/dev/jets3t/jets3t/0.7.1/jets3t-0.7.1-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/mortbay/jetty/jetty/6.1.14/jetty-6.1.14.jar" sourcepath="M2_REPO/org/mortbay/jetty/jetty/6.1.14/jetty-6.1.14-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/mortbay/jetty/jetty-util/6.1.14/jetty-util-6.1.14.jar" sourcepath="M2_REPO/org/mortbay/jetty/jetty-util/6.1.14/jetty-util-6.1.14-sources.jar"/>
Expand All @@ -38,15 +44,10 @@
<classpathentry kind="var" path="M2_REPO/junit/junit/4.8.2/junit-4.8.2.jar" sourcepath="M2_REPO/junit/junit/4.8.2/junit-4.8.2-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/net/sf/kosmosfs/kfs/0.3/kfs-0.3.jar"/>
<classpathentry kind="var" path="M2_REPO/log4j/log4j/1.2.14/log4j-1.2.14.jar" sourcepath="M2_REPO/log4j/log4j/1.2.14/log4j-1.2.14-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/apache/lucene/lucene-core/2.3.1/lucene-core-2.3.1.jar" sourcepath="M2_REPO/org/apache/lucene/lucene-core/2.3.1/lucene-core-2.3.1-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/oro/oro/2.0.8/oro-2.0.8.jar" sourcepath="M2_REPO/oro/oro/2.0.8/oro-2.0.8-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/mortbay/jetty/servlet-api-2.5/6.1.14/servlet-api-2.5-6.1.14.jar" sourcepath="M2_REPO/org/mortbay/jetty/servlet-api-2.5/6.1.14/servlet-api-2.5-6.1.14-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.5.8/slf4j-api-1.5.8.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.5.8/slf4j-api-1.5.8-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-api/1.5.11/slf4j-api-1.5.11.jar" sourcepath="M2_REPO/org/slf4j/slf4j-api/1.5.11/slf4j-api-1.5.11-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/slf4j/slf4j-log4j12/1.5.11/slf4j-log4j12-1.5.11.jar" sourcepath="M2_REPO/org/slf4j/slf4j-log4j12/1.5.11/slf4j-log4j12-1.5.11-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/stax/stax-api/1.0.1/stax-api-1.0.1.jar"/>
<classpathentry kind="var" path="M2_REPO/com/hp/hpl/jena/tdb/0.8.9/tdb-0.8.9.jar" sourcepath="M2_REPO/com/hp/hpl/jena/tdb/0.8.9/tdb-0.8.9-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/org/codehaus/woodstox/wstx-asl/3.2.9/wstx-asl-3.2.9.jar" sourcepath="M2_REPO/org/codehaus/woodstox/wstx-asl/3.2.9/wstx-asl-3.2.9-sources.jar"/>
<classpathentry kind="var" path="M2_REPO/xerces/xercesImpl/2.7.1/xercesImpl-2.7.1.jar"/>
<classpathentry kind="var" path="M2_REPO/xmlenc/xmlenc/0.52/xmlenc-0.52.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
</classpath>
35 changes: 35 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Dictionary Encoding Example
---------------------------

...

mvn hadoop:pack (see: http://github.com/akkumar/maven-hadoop)

hadoop fs -mkdir hdfs://localhost/user/castagna/src/
hadoop fs -mkdir hdfs://localhost/user/castagna/src/test
hadoop fs -mkdir hdfs://localhost/user/castagna/src/test/resources

hadoop fs -copyFromLocal src/test/resources/* hdfs://localhost/user/castagna/src/test/resources/

hadoop fs -rmr hdfs://localhost/user/castagna/target/

mvn hadoop:pack; hadoop jar ./target/hadoop-deploy/dicenc-hdeploy.jar com.talis.mapreduce.dicenc.Run


Maven
-----

Once you have installed Maven, you can have fun with the following commands:

mvn -Declipse.workspace=/opt/workspace eclipse:add-maven-repo
mvn eclipse:clean eclipse:eclipse -DdownloadSources=true -DdownloadJavadocs=true
mvn dependency:resolve
mvn compile
mvn test
mvn package
mvn site
mvn install
mvn deploy


-- Paolo Castagna, Talis Systems Ltd.
12 changes: 12 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@
<version>1.5.2</version>
</dependency>

<!--
<dependency>
<groupId>com.hp.hpl.jena</groupId>
<artifactId>tdb</artifactId>
<version>${tdb.version}</version>
</dependency>
-->

<dependency>
<groupId>org.slf4j</groupId>
Expand Down Expand Up @@ -100,6 +102,16 @@
</configuration>
</plugin>

<!-- See: http://github.com/akkumar/maven-hadoop -->
<plugin>
<groupId>com.github.maven-hadoop.plugin</groupId>
<artifactId>maven-hadoop-plugin</artifactId>
<version>0.20.1</version>
<configuration>
<hadoopHome>/usr/lib/hadoop</hadoopHome>
</configuration>
</plugin>

</plugins>

</build>
Expand Down
6 changes: 2 additions & 4 deletions src/main/java/com/talis/mapreduce/dicenc/FirstDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.talis.mapreduce.dicenc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
Expand All @@ -37,8 +36,7 @@ public int run(String[] args) throws Exception {
return -1;
}

Configuration configuration = new Configuration();
Job job = new Job(configuration, "first");
Job job = new Job(getConf(), "first");
job.setJarByClass(getClass());

FileInputFormat.addInputPath(job, new Path(args[0]));
Expand All @@ -54,7 +52,7 @@ public int run(String[] args) throws Exception {
}

public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new FirstDriver(), args);
/* int exitCode = */ ToolRunner.run(new FirstDriver(), args);
// System.exit(exitCode);
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/talis/mapreduce/dicenc/Run.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public static void main(String[] args) throws Exception
{
long now = System.currentTimeMillis();
String conf = "conf/hadoop-localhost.xml";
// String conf = "conf/hadoop-local.xml";

FirstDriver.main(new String[] { "-conf", conf, "src/test/resources/input", "target/" + now + "_output_1"} );
SecondDriver.main(new String[] { "-conf", conf, "src/test/resources/input", "target/" + now + "_output_2" } );
Expand Down
6 changes: 2 additions & 4 deletions src/main/java/com/talis/mapreduce/dicenc/SecondDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.talis.mapreduce.dicenc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
Expand All @@ -38,8 +37,7 @@ public int run(String[] args) throws Exception {
return -1;
}

Configuration configuration = new Configuration();
Job job = new Job(configuration, "second");
Job job = new Job(getConf(), "second");
job.setJarByClass(getClass());

FileInputFormat.addInputPath(job, new Path(args[0]));
Expand All @@ -57,7 +55,7 @@ public int run(String[] args) throws Exception {
}

public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new SecondDriver(), args);
/* int exitCode = */ ToolRunner.run(new SecondDriver(), args);
// System.exit(exitCode);
}

Expand Down
6 changes: 2 additions & 4 deletions src/main/java/com/talis/mapreduce/dicenc/ThirdDriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.talis.mapreduce.dicenc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
Expand All @@ -37,8 +36,7 @@ public int run(String[] args) throws Exception {
return -1;
}

Configuration configuration = new Configuration();
Job job = new Job(configuration, "third");
Job job = new Job(getConf(), "third");
job.setJarByClass(getClass());

FileInputFormat.addInputPath(job, new Path(args[0]));
Expand All @@ -55,7 +53,7 @@ public int run(String[] args) throws Exception {
}

public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new ThirdDriver(), args);
/* int exitCode = */ ToolRunner.run(new ThirdDriver(), args);
// System.exit(exitCode);
}

Expand Down

0 comments on commit 735ea3b

Please sign in to comment.