Permalink
Browse files

Added lots of commenting and JavaDoc

  • Loading branch information...
1 parent 3e94e96 commit 4c68b1a0e38ab948062587374d8b16f36ba97585 @eljefe6a committed Dec 28, 2012
Showing with 282 additions and 87 deletions.
  1. +92 −16 src/BoggleDriver.java
  2. +67 −44 src/BoggleMapper.java
  3. +2 −0 src/BoggleReducer.java
  4. +34 −2 src/BoggleRoll.java
  5. +7 −0 src/BoggleWordMapper.java
  6. +20 −1 src/Node.java
  7. +60 −24 src/RollGraphWritable.java
View
@@ -1,4 +1,5 @@
import java.io.IOException;
+import java.util.ArrayList;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -17,22 +18,22 @@
public class BoggleDriver extends Configured implements Tool {
private static final Logger logger = Logger.getLogger(BoggleDriver.class);
-
+
/** The parameter name for the minimum word size to output */
public static final String MINIMUM_WORD_SIZE_PARAM = "minimumwordsize";
-
+
/** The default value for the minimum word size to output */
public static final int MINIMUM_WORD_SIZE_DEFAULT = 3;
-
+
/** The parameter name for the bloom filter location */
public static final String BLOOM_PARAM = "bloompath";
-
+
/** The parameter name for the dictionary location */
public static final String DICTIONARY_PARAM = "dictionarypath";
-
+
/** The parameter name for the roll to be serialized */
public static final String ROLL_PARAM = "roll";
-
+
@Override
public int run(String[] args) throws Exception {
if (args.length != 4) {
@@ -49,7 +50,7 @@ public int run(String[] args) throws Exception {
// To change how the mappers are created to process the roll,
// pass in -D mapreduce.input.lineinputformat.linespermap=0
// or in code uncomment:
- //configuration.set("mapreduce.input.lineinputformat.linespermap", "8");
+ // configuration.set("mapreduce.input.lineinputformat.linespermap", "8");
FileSystem fileSystem = FileSystem.get(configuration);
@@ -71,13 +72,38 @@ public int run(String[] args) throws Exception {
BoggleRoll roll = BoggleRoll.createRoll();
configuration.set(ROLL_PARAM, roll.serialize());
- writeRollFile(input, fileSystem, roll);
+ int iteration = traverseGraph(input, configuration, fileSystem, roll);
+ boolean success = findWords(input, output, configuration, iteration);
+
+ return success ? 0 : 1;
+ }
+
+ /**
+ * Traverses the graph until all possible words are found
+ *
+ * @param input
+ * The input directory
+ * @param configuration
+ * The configuration object
+ * @param fileSystem
+ * The filesystem object
+ * @param roll
+ * The Boggle roll to process
+ * @return The number of iterations it took to traverse the graph
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ private int traverseGraph(String input, Configuration configuration, FileSystem fileSystem, BoggleRoll roll)
+ throws IOException, InterruptedException, ClassNotFoundException {
int iteration = 0;
+ writeRollFile(input, fileSystem, roll, iteration);
+
long previousWordCount = 0;
- // Traverse the graph until it is exhausted
+ // Traverse the graph until it is completely traversed
do {
Job job = new Job(configuration);
job.setJarByClass(BoggleDriver.class);
@@ -86,6 +112,7 @@ public int run(String[] args) throws Exception {
FileInputFormat.setInputPaths(job, getPath(input, iteration));
FileOutputFormat.setOutputPath(job, getPath(input, iteration + 1));
+ // Roll is broken in to x mappers per node
job.setInputFormatClass(NLineInputFormat.class);
job.setNumReduceTasks(1);
@@ -102,23 +129,44 @@ public int run(String[] args) throws Exception {
boolean success = job.waitForCompletion(true);
if (!success) {
- return 0;
+ throw new RuntimeException("Job did not return sucessfully. Check the logs for info.");
}
// Check to see if the entire graph has been traversed
long currentWordCount = job.getCounters().findCounter("boggle", "words").getValue();
if (currentWordCount == previousWordCount) {
- logger.info("Finished traversing graph after " + iteration + " iterations. Found " + currentWordCount + " potential words.");
+ logger.info("Finished traversing graph after " + iteration + " iterations. Found " + currentWordCount
+ + " potential words.");
break;
}
-
+
previousWordCount = currentWordCount;
iteration++;
} while (true);
- // Check for words and output to final directory
+ return iteration;
+ }
+
+ /**
+ * Takes the traversed graph and finds the actual words in the Boggle Roll
+ *
+ * @param input
+ * The input directory
+ * @param output
+ * The output directory
+ * @param configuration
+ * The configuration object
+ * @param iteration
+ * The number of iterations it took to traverse the graph
+ * @return If the job was successful
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ private boolean findWords(String input, String output, Configuration configuration, int iteration)
+ throws IOException, InterruptedException, ClassNotFoundException {
Job job = new Job(configuration);
job.setJarByClass(BoggleDriver.class);
job.setJobName("Boggle Graph Final");
@@ -134,22 +182,50 @@ public int run(String[] args) throws Exception {
job.setOutputValueClass(RollGraphWritable.class);
boolean success = job.waitForCompletion(true);
- return success ? 0 : 1;
+ return success;
}
- private void writeRollFile(String input, FileSystem fileSystem, BoggleRoll roll) throws IOException {
+ /**
+ * Writes out the Boggle roll to a file as an adjacency matrix
+ *
+ * @param input
+ * The place to write the roll to
+ * @param fileSystem
+ * The filesystem object
+ * @param roll
+ * The Boggle roll to write out
+ * @param iteration
+ * The iteration for the input
+ * @throws IOException
+ */
+ private void writeRollFile(String input, FileSystem fileSystem, BoggleRoll roll, int iteration) throws IOException {
FSDataOutputStream outputStream = fileSystem.create(getPath(input, 0));
for (int i = 0; i < roll.rollCharacters.length; i++) {
for (int j = 0; j < roll.rollCharacters[i].length; j++) {
- String output = roll.rollCharacters[i][j] + " " + "[[" + i + "," + j + "]] false\n";
+ ArrayList<Node> nodes = new ArrayList<Node>();
+ nodes.add(new Node(i, j));
+
+ RollGraphWritable graphWritable = new RollGraphWritable(nodes, false);
+
+ // Mimic the adjacency matrix written by the mapper to start things off
+ String output = roll.rollCharacters[i][j] + " " + graphWritable.serialize() + "\n";
outputStream.writeBytes(output);
}
}
outputStream.close();
}
+ /**
+ * Gets the path based on the iteration
+ *
+ * @param input
+ * The base input directory
+ * @param iteration
+ * The iteration number
+ * @return The path for the iteration
+ */
private Path getPath(String input, int iteration) {
return new Path(input + "-" + iteration);
}
View
@@ -14,19 +14,23 @@
public class BoggleMapper extends Mapper<LongWritable, Text, Text, RollGraphWritable> {
private static final Logger logger = Logger.getLogger(BoggleMapper.class);
+ /** The Boggle Roll that is being process */
private BoggleRoll roll;
-
+
+ /** The Bloom Filter with the dictionary */
private BloomFilter bloomFilter;
@Override
public void setup(Context context) throws IOException {
Configuration configuration = context.getConfiguration();
-
+
+ // Get the Boggle Roll
roll = BoggleRoll.deserialize(configuration.get(BoggleDriver.ROLL_PARAM));
-
+
+ // Load the Bloom Filter
FileSystem fileSystem = FileSystem.get(configuration);
-
- bloomFilter = new BloomFilter(UserDictBloom.vectorSize, UserDictBloom.nbHash, UserDictBloom.hashType);
+
+ bloomFilter = new BloomFilter(UserDictBloom.vectorSize, UserDictBloom.nbHash, UserDictBloom.hashType);
bloomFilter.readFields(fileSystem.open(new Path(configuration.get(BoggleDriver.BLOOM_PARAM))));
}
@@ -40,55 +44,74 @@ public void map(LongWritable key, Text value, Context context) throws IOExceptio
if (values.length == 3) {
String charsSoFar = values[0];
-
+
RollGraphWritable rollGraph = RollGraphWritable.deserialize(values[1] + " " + values[2]);
if (!rollGraph.isFinal) {
- // Mark node as exhausted and emit
- rollGraph.isFinal = true;
+ processNonFinalNode(context, charsSoFar, rollGraph);
+ } else {
context.write(new Text(charsSoFar), rollGraph);
+ }
+ } else {
+ logger.warn("The input line had more spaces than were expected. Had " + values.length
+ + " expected 3. The line was \"" + line + "\"");
+ }
+ }
- // Emit the letters around it
- Node node = rollGraph.nodes.get(rollGraph.nodes.size() - 1);
+ /**
+ * Emits the nodes around the last processed node
+ *
+ * @param context
+ * The context object for incrementing
+ * @param charsSoFar
+ * The characters making up the node so far
+ * @param rollGraph
+ * The RollGraphWritable representing the nodes
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ private void processNonFinalNode(Context context, String charsSoFar, RollGraphWritable rollGraph)
+ throws IOException, InterruptedException {
+ // Mark node as exhausted and emit
+ rollGraph.isFinal = true;
+ context.write(new Text(charsSoFar), rollGraph);
- for (int row = node.row - 1; row < node.row + 1; row++) {
- if (row < 0 || row >= BoggleRoll.letters.length) {
- // Check if row is outside the bounds and skip if so
- continue;
- }
+ // Emit the letters around the last node in the Boggle Roll
+ Node node = rollGraph.nodes.get(rollGraph.nodes.size() - 1);
- for (int col = node.column - 1; col < node.column + 1; col++) {
- if (col < 0 || col >= BoggleRoll.letters.length) {
- // Check if column is outside the bounds and skip if so
- continue;
- }
-
- // Found viable row and column. See if node has already been traversed
- Node nextNode = new Node(row, col);
-
- if (!rollGraph.nodes.contains(nextNode)) {
- // Node not found, see if it passes the membership test
- String newWord = charsSoFar + roll.rollCharacters[row][col];
-
- if (bloomFilter.membershipTest(new Key(newWord.getBytes()))) {
- // It might exist, create new object, add new node, and emit
- @SuppressWarnings("unchecked")
- ArrayList<Node> nextNodeList = (ArrayList<Node>) rollGraph.nodes.clone();
- nextNodeList.add(nextNode);
-
- RollGraphWritable nextGraphWritable = new RollGraphWritable(nextNodeList, false);
-
- context.write(new Text(newWord), nextGraphWritable);
- }
- }
+ for (int row = node.row - 1; row < node.row + 1; row++) {
+ if (row < 0 || row >= BoggleRoll.letters.length) {
+ // Check if row is outside the bounds and skip if so
+ continue;
+ }
+
+ for (int col = node.column - 1; col < node.column + 1; col++) {
+ if (col < 0 || col >= BoggleRoll.letters.length) {
+ // Check if column is outside the bounds and skip if so
+ continue;
+ }
+
+ // Found viable row and column. See if node has already been traversed
+ Node nextNode = new Node(row, col);
+
+ if (!rollGraph.nodes.contains(nextNode)) {
+ // Node not found, see if it passes the membership test
+ String newWord = charsSoFar + roll.rollCharacters[row][col];
+
+ if (bloomFilter.membershipTest(new Key(newWord.getBytes()))) {
+ // It might exist, create new object, add new node, and emit
+ @SuppressWarnings("unchecked")
+ ArrayList<Node> nextNodeList = (ArrayList<Node>) rollGraph.nodes.clone();
+ nextNodeList.add(nextNode);
+
+ RollGraphWritable nextGraphWritable = new RollGraphWritable(nextNodeList, false);
+
+ context.write(new Text(newWord), nextGraphWritable);
+ } else {
+ logger.info("Throwing out " + newWord + " because it didn't pass membership test");
}
}
- } else {
- context.write(new Text(charsSoFar), rollGraph);
}
- } else {
- logger.warn("The input line had more spaces than were expected. Had " + values.length
- + " expected 3. The line was \"" + line + "\"");
}
}
}
View
@@ -9,8 +9,10 @@
public void reduce(Text key, Iterable<RollGraphWritable> values, Context context) throws IOException,
InterruptedException {
for (RollGraphWritable value : values) {
+ // Identity reducer
context.write(key, value);
+ // Use counters to keep track of how many words were found so far
context.getCounter("boggle", "words").increment(1);
}
}
Oops, something went wrong.

0 comments on commit 4c68b1a

Please sign in to comment.