Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Changed bloom setup. Changed to output 1 file per row.

  • Loading branch information...
commit faa0d77badd1772402946ad5c9f3bf708fd651aa 1 parent 329b362
@eljefe6a authored
View
BIN  bloom.out
Binary file not shown
View
27 src/BoggleDriver.java
@@ -46,7 +46,7 @@
public static final String MAX_ITERATIONS_PARAM = "maxiterations";
/** The default value for the minimum word size to output */
- public static final int MAX_ITERATIONS_DEFAULT = 9;
+ public static final int MAX_ITERATIONS_DEFAULT = 15;
@Override
public int run(String[] args) throws Exception {
@@ -85,7 +85,7 @@ public int run(String[] args) throws Exception {
configuration.set(BLOOM_PARAM, bloomPath);
configuration.set(DICTIONARY_PARAM, dictionary);
- BoggleRoll roll = BoggleRoll.createRoll(configuration.getInt(ROLL_VERSION, BoggleRoll.BIG_BOGGLE_VERSION));
+ BoggleRoll roll = BoggleRoll.createRoll(configuration.getInt(ROLL_VERSION, 1000));
configuration.set(ROLL_PARAM, roll.serialize());
int iteration = traverseGraph(input, configuration, fileSystem, roll);
@@ -150,13 +150,14 @@ private int traverseGraph(String input, Configuration configuration, FileSystem
throw new RuntimeException("Job did not return sucessfully. Check the logs for info.");
}
- // Check to see if the entire graph has been traversed
long currentWordCount = job.getCounters().findCounter("boggle", "words").getValue();
bloomSavings += job.getCounters().findCounter("boggle", "bloom").getValue();
logger.info("Traversed graph for " + iteration + " iterations. Found " + currentWordCount
+ " potential words. Bloom saved " + bloomSavings + " so far.");
+ // Check to see if the entire graph has been traversed, the entire roll has been iterated,
+ // or the maximum number of iterations have happened.
if (currentWordCount == previousWordCount
|| iteration == (roll.rollSize * roll.rollSize) || iteration == maxiterations) {
logger.info("Finished traversing graph after " + iteration + " iterations. Found " + currentWordCount
@@ -236,7 +237,12 @@ private void writeRollFile(String input, FileSystem fileSystem, Configuration co
Path parent = getPath(input, iteration);
fileSystem.mkdirs(parent);
+ SequenceFile.Writer writer = null;
+
for (int i = 0; i < roll.rollCharacters.length; i++) {
+ writer = SequenceFile.createWriter(fileSystem, configuration, new Path(parent, i + ".txt"),
+ Text.class, RollGraphWritable.class);
+
for (int j = 0; j < roll.rollCharacters[i].length; j++) {
ArrayList<Node> nodes = new ArrayList<Node>();
nodes.add(new Node(i, j));
@@ -245,21 +251,14 @@ private void writeRollFile(String input, FileSystem fileSystem, Configuration co
Text text = new Text(roll.rollCharacters[i][j]);
- // Note:
- // By creating a file per starting character, that can cause
- // one character's file to get very little use if it's a z or x or y.
- // You could work around this by rebalancing every so often.
-
// Mimic the adjacency matrix written by the mapper to start things off
- SequenceFile.Writer writer = null;
-
- writer = SequenceFile.createWriter(fileSystem, configuration, new Path(parent, i + "-" + j + ".txt"),
- text.getClass(), graphWritable.getClass());
writer.append(text, graphWritable);
-
- IOUtils.closeStream(writer);
}
+
+ IOUtils.closeStream(writer);
}
+
+ IOUtils.closeStream(writer);
}
/**
View
4 src/UserDictBloom.java
@@ -14,9 +14,9 @@
public class UserDictBloom {
/** The vector size for the Bloom Filter */
- public static final int VECTOR_SIZE = 1048576;
+ public static final int VECTOR_SIZE = 10485760;
/** The number of hashes for the Bloom Filter */
- public static final int NBHASH = 3;
+ public static final int NBHASH = 6;
/** The type of hashing to use for the Bloom Filter */
public static final int HASH_TYPE = Hash.MURMUR_HASH;
Please sign in to comment.
Something went wrong with that request. Please try again.