-
Notifications
You must be signed in to change notification settings - Fork 0
/
GDIndexReducer.java
57 lines (50 loc) · 1.57 KB
/
GDIndexReducer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
* To define a reduce function for your MapReduce job, subclass
* the Reducer class and override the reduce method.
* The class definition requires four parameters:
* @param The data type of the input key - Text
* @param The data type of the input value - IntWritable
* @param The data type of the output key - Text
* @param The data type of the output value - DoubleWritable
*/
public class GDIndexReducer extends
Reducer<Text, IntWritable, Text, Text> {
/**
* The reduce method runs once for each key received from
* the shuffle and sort phase of the MapReduce framework.
* The method receives:
* @param A key of type Text
* @param A set of values of type IntWritable
* @param A Context object
*/
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
String line="";
int cnt=1; //counter
//get first value for comparison
int first= values.iterator().next().get();
for(IntWritable value :values){
//if first docID is repeating count++
if(first == value.get())
cnt+=1;
else
{
//if new docID dump first,count and starting counting with
//new docID
line+="("+first+","+cnt+")";
cnt=1;
first=value.get();
}
}
//dump last count
line+="("+first+","+cnt+")";
context.write(key, new Text(line));
}
}