Skip to content
This repository

Add PTable.materializeToMap() #40

Merged
merged 3 commits into from almost 2 years ago

2 participants

Brian Martin Josh Wills
Brian Martin

This function is a convenience for lazily materializing a PTable to a Map by wrapping MaterializableIterable.

added some commits June 12, 2012
Josh Wills

Replace the first PTable w/Map.

Josh Wills jwills merged commit 8c32538 into from June 12, 2012
Josh Wills jwills closed this June 12, 2012
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 3 unique commits by 1 author.

Jun 12, 2012
add .cache to .gitignore 1bbb99e
add PTable.materializeToMap() 90f27f3
update incorrect docs 0997a1b
This page is out of date. Refresh to see the latest.
1  .gitignore
... ...
@@ -1,4 +1,5 @@
1 1
 .classpath
2 2
 .project
3 3
 .settings
  4
+.cache
4 5
 target
6  src/main/java/com/cloudera/crunch/PTable.java
@@ -16,6 +16,7 @@
16 16
 package com.cloudera.crunch;
17 17
 
18 18
 import java.util.Collection;
  19
+import java.util.Map;
19 20
 
20 21
 import com.cloudera.crunch.types.PTableType;
21 22
 import com.cloudera.crunch.types.PType;
@@ -118,4 +119,9 @@
118 119
    */
119 120
   PCollection<V> values();
120 121
   
  122
+  /**
  123
+   * Returns a Map<K, V> made up of the keys and values in this PTable.
  124
+   */
  125
+  Map<K, V> materializeToMap();
  126
+
121 127
 }
7  src/main/java/com/cloudera/crunch/impl/mem/collect/MemTable.java
@@ -16,6 +16,7 @@
16 16
 
17 17
 import java.util.Collection;
18 18
 import java.util.List;
  19
+import java.util.Map;
19 20
 
20 21
 import com.cloudera.crunch.GroupingOptions;
21 22
 import com.cloudera.crunch.PCollection;
@@ -27,6 +28,7 @@
27 28
 import com.cloudera.crunch.lib.Cogroup;
28 29
 import com.cloudera.crunch.lib.Join;
29 30
 import com.cloudera.crunch.lib.PTables;
  31
+import com.cloudera.crunch.materialize.MaterializableMap;
30 32
 import com.cloudera.crunch.types.PTableType;
31 33
 import com.cloudera.crunch.types.PType;
32 34
 import com.google.common.collect.Lists;
@@ -133,4 +135,9 @@ public MemTable(Iterable<Pair<K, V>> collect, PTableType<K, V> ptype, String nam
133 135
     return PTables.values(this);
134 136
   }
135 137
 
  138
+  @Override
  139
+  public Map<K, V> materializeToMap() {
  140
+    return new MaterializableMap<K, V>(this.materialize());
  141
+  }
  142
+
136 143
 }
2  src/main/java/com/cloudera/crunch/impl/mr/MRPipeline.java
@@ -26,8 +26,6 @@
26 26
 import org.apache.hadoop.fs.FileSystem;
27 27
 import org.apache.hadoop.fs.Path;
28 28
 
29  
-import com.cloudera.crunch.DoFn;
30  
-import com.cloudera.crunch.Emitter;
31 29
 import com.cloudera.crunch.MapFn;
32 30
 import com.cloudera.crunch.PCollection;
33 31
 import com.cloudera.crunch.PTable;
10  src/main/java/com/cloudera/crunch/impl/mr/collect/PTableBase.java
@@ -16,6 +16,7 @@
16 16
 
17 17
 import java.util.Collection;
18 18
 import java.util.List;
  19
+import java.util.Map;
19 20
 
20 21
 import com.cloudera.crunch.GroupingOptions;
21 22
 import com.cloudera.crunch.PCollection;
@@ -26,6 +27,7 @@
26 27
 import com.cloudera.crunch.lib.Cogroup;
27 28
 import com.cloudera.crunch.lib.Join;
28 29
 import com.cloudera.crunch.lib.PTables;
  30
+import com.cloudera.crunch.materialize.MaterializableMap;
29 31
 import com.cloudera.crunch.types.PType;
30 32
 import com.google.common.collect.Lists;
31 33
 
@@ -107,4 +109,12 @@ public PTableBase(String name) {
107 109
   public PCollection<V> values() {
108 110
     return PTables.values(this);
109 111
   }
  112
+
  113
+  /**
  114
+   * Returns a Map<K, V> made up of the keys and values in this PTable.
  115
+   */
  116
+  public Map<K, V> materializeToMap() {
  117
+    return new MaterializableMap<K, V>(this.materialize());
  118
+  }
  119
+
110 120
 }
34  src/main/java/com/cloudera/crunch/materialize/MaterializableMap.java
... ...
@@ -0,0 +1,34 @@
  1
+package com.cloudera.crunch.materialize;
  2
+
  3
+import java.util.AbstractMap;
  4
+import java.util.HashMap;
  5
+import java.util.Iterator;
  6
+import java.util.Map;
  7
+import java.util.Set;
  8
+
  9
+import com.cloudera.crunch.Pair;
  10
+
  11
+public class MaterializableMap<K, V> extends AbstractMap<K, V> {
  12
+  
  13
+  private Iterable<Pair<K, V>> iterable;
  14
+  private Set<Map.Entry<K, V>> entrySet;
  15
+  
  16
+  public MaterializableMap(Iterable<Pair<K, V>> iterable) {
  17
+  	this.iterable = iterable;
  18
+  }
  19
+  
  20
+  private Set<Map.Entry<K, V>> toMapEntries(Iterable<Pair<K, V>> xs) {
  21
+    HashMap<K, V> m = new HashMap<K, V>();
  22
+    for (Pair<K, V> x : xs)
  23
+      m.put(x.first(), x.second());
  24
+    return m.entrySet();
  25
+  }
  26
+
  27
+  @Override
  28
+  public Set<Map.Entry<K, V>> entrySet() {
  29
+    if (entrySet == null)
  30
+      entrySet = toMapEntries(iterable);
  31
+    return entrySet;
  32
+  }
  33
+
  34
+}
60  src/test/java/com/cloudera/crunch/MaterializeToMapTest.java
... ...
@@ -0,0 +1,60 @@
  1
+package com.cloudera.crunch;
  2
+
  3
+import java.io.IOException;
  4
+import java.util.Map;
  5
+
  6
+import org.junit.Test;
  7
+import static junit.framework.Assert.assertTrue;
  8
+
  9
+import com.cloudera.crunch.impl.mem.MemPipeline;
  10
+import com.cloudera.crunch.impl.mr.MRPipeline;
  11
+import com.cloudera.crunch.test.FileHelper;
  12
+import com.cloudera.crunch.types.PTypeFamily;
  13
+import com.google.common.collect.ImmutableList;
  14
+
  15
+public class MaterializeToMapTest {
  16
+  
  17
+  static final ImmutableList<Pair<Integer,String>> kvPairs = 
  18
+      ImmutableList.of(
  19
+          Pair.of(0, "a"),
  20
+          Pair.of(1, "b"),
  21
+          Pair.of(2, "c"),
  22
+          Pair.of(3, "e"));
  23
+  
  24
+  public void assertMatches(Map<Integer,String> m) {
  25
+    for (Integer k : m.keySet()) {
  26
+      System.out.println(k + " " + kvPairs.get(k).second() + " " + m.get(k));
  27
+      assertTrue(kvPairs.get(k).second().equals(m.get(k)));
  28
+    }
  29
+  }
  30
+  
  31
+  @Test
  32
+  public void testMemMaterializeToMap() {
  33
+    assertMatches(MemPipeline.tableOf(kvPairs).materializeToMap());
  34
+  }
  35
+  
  36
+  private static class Set1Mapper extends MapFn<String,Pair<Integer,String>> {
  37
+    @Override
  38
+    public Pair<Integer, String> map(String input) {
  39
+      
  40
+      int k = -1;
  41
+      if (input.equals("a")) k = 0;
  42
+      else if (input.equals("b")) k = 1;
  43
+      else if (input.equals("c")) k = 2;
  44
+      else if (input.equals("e")) k = 3;
  45
+      return Pair.of(k, input);
  46
+    }
  47
+  }
  48
+  
  49
+  @Test
  50
+  public void testMRMaterializeToMap() throws IOException {
  51
+    Pipeline p = new MRPipeline(MaterializeToMapTest.class);
  52
+    String inputFile = FileHelper.createTempCopyOf("set1.txt");
  53
+    PCollection<String> c = p.readTextFile(inputFile);
  54
+    PTypeFamily tf = c.getTypeFamily();
  55
+    PTable<Integer,String> t = c.parallelDo(new Set1Mapper(), tf.tableOf(tf.ints(), tf.strings()));
  56
+    Map<Integer, String> m = t.materializeToMap();
  57
+    assertMatches(m);
  58
+  }
  59
+
  60
+}
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.