Permalink
Browse files

Initial commit

  • Loading branch information...
1 parent 299d3df commit 8462ba9f8d3df12aa66576828f6f62110d3b956a training committed Dec 28, 2012
Showing with 679 additions and 0 deletions.
  1. +71 −0 .classpath
  2. +2 −0 .gitignore
  3. +17 −0 .project
  4. BIN bloom.out
  5. +146 −0 src/BoggleDriver.java
  6. +94 −0 src/BoggleMapper.java
  7. +17 −0 src/BoggleReducer.java
  8. +77 −0 src/BoggleRoll.java
  9. +76 −0 src/BoggleWordMapper.java
  10. +23 −0 src/Node.java
  11. +91 −0 src/RollGraphWritable.java
  12. +65 −0 src/UserDictBloom.java
View
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/asm-3.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/avro-1.7.1.cloudera.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-beanutils-1.7.0.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-beanutils-core-1.8.0.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-cli-1.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-codec-1.4.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-collections-3.2.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-configuration-1.6.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-digester-1.8.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-el-1.0.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-io-2.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-lang-2.5.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-logging-1.1.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-math-2.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/commons-net-3.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/guava-11.0.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/hadoop-auth-2.0.0-cdh4.1.1.jar" sourcepath="/home/training/src/hadoop-common-project/hadoop-auth/src/main/java">
+ <attributes>
+ <attribute name="javadoc_location" value="http://archive.cloudera.com/cdh4/cdh/4/hadoop/api"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/hadoop-common-2.0.0-cdh4.1.1.jar" sourcepath="/home/training/src/hadoop-common-project/hadoop-common/src/main/java">
+ <attributes>
+ <attribute name="javadoc_location" value="http://archive.cloudera.com/cdh4/cdh/4/hadoop/api"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/hadoop-core-2.0.0-mr1-cdh4.1.1.jar" sourcepath="/home/training/src/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java">
+ <attributes>
+ <attribute name="javadoc_location" value="http://archive.cloudera.com/cdh4/cdh/4/hadoop/api"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/hadoop-hdfs-2.0.0-cdh4.1.1.jar" sourcepath="/home/training/src/hadoop-hdfs-project/hadoop-hdfs/src/main/java">
+ <attributes>
+ <attribute name="javadoc_location" value="http://archive.cloudera.com/cdh4/cdh/4/hadoop/api"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/hsqldb-1.8.0.10.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jackson-core-asl-1.8.8.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jackson-mapper-asl-1.8.8.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jasper-runtime-5.5.23.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jersey-core-1.8.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jersey-server-1.8.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jetty-6.1.26.cloudera.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jetty-util-6.1.26.cloudera.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jline-0.9.94.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jsch-0.1.42.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jsp-api-2.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/jsr305-1.3.9.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/junit-4.8.2.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/log4j-1.2.17.jar">
+ <attributes>
+ <attribute name="javadoc_location" value="http://logging.apache.org/log4j/1.2/apidocs"/>
+ </attributes>
+ </classpathentry>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/mockito-all-1.8.5.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/paranamer-2.3.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/protobuf-java-2.4.0a.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/servlet-api-2.5.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/slf4j-api-1.6.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/slf4j-log4j12-1.6.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/snappy-java-1.0.4.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/xmlenc-0.52.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/client-0.20/zookeeper-3.4.3-cdh4.1.1.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/hadoop-annotations.jar"/>
+ <classpathentry kind="lib" path="/usr/lib/hadoop/lib/commons-httpclient-3.1.jar"/>
+ <classpathentry kind="output" path="bin"/>
+</classpath>
View
@@ -0,0 +1,2 @@
+.*.crc
+bin/
View
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>BoggleMapReduce</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
View
Binary file not shown.
View
@@ -0,0 +1,146 @@
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Logger;
+
+public class BoggleDriver extends Configured implements Tool {
+ private static final Logger logger = Logger.getLogger(BoggleDriver.class);
+
+ public static final int MINIMUM_WORD_SIZE = 3;
+
+ @Override
+ public int run(String[] args) throws Exception {
+ if (args.length != 4) {
+ System.out.println("Usage: BoggleDriver <bloomfile> <dictionary> <input dir> <output dir>");
+ return -1;
+ }
+
+ String bloomPath = args[0];
+ String dictionary = args[1];
+ String input = args[2];
+ String output = args[3];
+
+ Configuration configuration = getConf();
+ configuration.set("mapreduce.input.lineinputformat.linespermap", "8");
+
+ FileSystem fileSystem = FileSystem.get(configuration);
+
+ if (!fileSystem.exists(new Path(bloomPath))) {
+ // Verify that Bloom file exists
+ System.out.println("Could not find bloom file");
+ return -1;
+ }
+
+ if (fileSystem.exists(new Path(output))) {
+ // Verify that output does not exist
+ System.out.println("Output file already exists");
+ return -1;
+ }
+
+ configuration.set("bloompath", bloomPath);
+ configuration.set("dictionarypath", dictionary);
+
+ BoggleRoll roll = BoggleRoll.createRoll();
+ configuration.set("roll", roll.serialize());
+
+ writeRollFile(input, fileSystem, roll);
+
+ boolean isDone = false;
+ int iteration = 0;
+
+ long previousWordCount = 0;
+
+ // Traverse the graph until it is exhausted
+ do {
+ Job job = new Job(configuration);
+ job.setJarByClass(BoggleDriver.class);
+ job.setJobName("Boggle Graph Iteration " + iteration);
+
+ FileInputFormat.setInputPaths(job, getPath(input, iteration));
+ FileOutputFormat.setOutputPath(job, getPath(input, iteration + 1));
+
+ job.setInputFormatClass(NLineInputFormat.class);
+
+ job.setNumReduceTasks(1);
+
+ job.setMapperClass(BoggleMapper.class);
+ job.setReducerClass(BoggleReducer.class);
+
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(RollGraphWritable.class);
+
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(RollGraphWritable.class);
+
+ boolean success = job.waitForCompletion(true);
+
+ if (!success) {
+ return 0;
+ }
+
+ // Check to see if the entire graph has been traversed
+ long currentWordCount = job.getCounters().findCounter("boggle", "words").getValue();
+
+ if (currentWordCount == previousWordCount) {
+ logger.info("Finished traversing graph after " + iteration + " iterations. Found " + currentWordCount + " potential words.");
+ break;
+ }
+
+ previousWordCount = currentWordCount;
+
+ iteration++;
+ } while (!isDone);
+
+ // Check for words and output to final directory
+ Job job = new Job(configuration);
+ job.setJarByClass(BoggleDriver.class);
+ job.setJobName("Boggle Graph Final");
+
+ FileInputFormat.setInputPaths(job, getPath(input, iteration));
+ FileOutputFormat.setOutputPath(job, new Path(output));
+
+ job.setNumReduceTasks(1);
+
+ job.setMapperClass(BoggleWordMapper.class);
+
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(RollGraphWritable.class);
+
+ boolean success = job.waitForCompletion(true);
+ return success ? 0 : 1;
+ }
+
+ private void writeRollFile(String input, FileSystem fileSystem, BoggleRoll roll) throws IOException {
+ FSDataOutputStream outputStream = fileSystem.create(getPath(input, 0));
+
+ for (int i = 0; i < roll.rollCharacters.length; i++) {
+ for (int j = 0; j < roll.rollCharacters[i].length; j++) {
+ String output = roll.rollCharacters[i][j] + " " + "[[" + i + "," + j + "]] false\n";
+ outputStream.writeBytes(output);
+ }
+ }
+
+ outputStream.close();
+ }
+
+ private Path getPath(String input, int iteration) {
+ return new Path(input + "-" + iteration);
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new Configuration(), new BoggleDriver(), args);
+ System.exit(exitCode);
+ }
+}
View
@@ -0,0 +1,94 @@
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.util.bloom.BloomFilter;
+import org.apache.hadoop.util.bloom.Key;
+import org.apache.log4j.Logger;
+
+public class BoggleMapper extends Mapper<LongWritable, Text, Text, RollGraphWritable> {
+ private static final Logger logger = Logger.getLogger(BoggleMapper.class);
+
+ private BoggleRoll roll;
+
+ private BloomFilter bloomFilter;
+
+ @Override
+ public void setup(Context context) throws IOException {
+ Configuration configuration = context.getConfiguration();
+
+ roll = BoggleRoll.deserialize(configuration.get("roll"));
+
+ FileSystem fileSystem = FileSystem.get(configuration);
+
+ bloomFilter = new BloomFilter(UserDictBloom.vectorSize, UserDictBloom.nbHash, UserDictBloom.hashType);
+ bloomFilter.readFields(fileSystem.open(new Path(configuration.get("bloompath"))));
+ }
+
+ @Override
+ public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+ // Expected input:
+ // aaaaa [[0,0][1,1][2,2]] false
+ String line = value.toString();
+
+ String values[] = line.split("\\s");
+
+ if (values.length == 3) {
+ String charsSoFar = values[0];
+
+ RollGraphWritable rollGraph = RollGraphWritable.deserialize(values[1] + " " + values[2]);
+
+ if (!rollGraph.isFinal) {
+ // Mark node as exhausted and emit
+ rollGraph.isFinal = true;
+ context.write(new Text(charsSoFar), rollGraph);
+
+ // Emit the letters around it
+ Node node = rollGraph.nodes.get(rollGraph.nodes.size() - 1);
+
+ for (int row = node.row - 1; row < node.row + 1; row++) {
+ if (row < 0 || row >= BoggleRoll.letters.length) {
+ // Check if row is outside the bounds and skip if so
+ continue;
+ }
+
+ for (int col = node.column - 1; col < node.column + 1; col++) {
+ if (col < 0 || col >= BoggleRoll.letters.length) {
+ // Check if column is outside the bounds and skip if so
+ continue;
+ }
+
+ // Found viable row and column. See if node has already been traversed
+ Node nextNode = new Node(row, col);
+
+ if (!rollGraph.nodes.contains(nextNode)) {
+ // Node not found, see if it passes the membership test
+ String newWord = charsSoFar + roll.rollCharacters[row][col];
+
+ if (bloomFilter.membershipTest(new Key(newWord.getBytes()))) {
+ // It might exist, create new object, add new node, and emit
+ @SuppressWarnings("unchecked")
+ ArrayList<Node> nextNodeList = (ArrayList<Node>) rollGraph.nodes.clone();
+ nextNodeList.add(nextNode);
+
+ RollGraphWritable nextGraphWritable = new RollGraphWritable(nextNodeList, false);
+
+ context.write(new Text(newWord), nextGraphWritable);
+ }
+ }
+ }
+ }
+ } else {
+ context.write(new Text(charsSoFar), rollGraph);
+ }
+ } else {
+ logger.warn("The input line had more spaces than were expected. Had " + values.length
+ + " expected 3. The line was \"" + line + "\"");
+ }
+ }
+}
@@ -0,0 +1,17 @@
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+
+public class BoggleReducer extends Reducer<Text, RollGraphWritable, Text, RollGraphWritable> {
+
+ @Override
+ public void reduce(Text key, Iterable<RollGraphWritable> values, Context context) throws IOException,
+ InterruptedException {
+ for (RollGraphWritable value : values) {
+ context.write(key, value);
+
+ context.getCounter("boggle", "words").increment(1);
+ }
+ }
+}
Oops, something went wrong.

0 comments on commit 8462ba9

Please sign in to comment.