-
Notifications
You must be signed in to change notification settings - Fork 2
/
NBTrainingMapper.java
36 lines (32 loc) · 1.11 KB
/
NBTrainingMapper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
package naivebayes;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
* @author Bingwei Liu
*
* Parse the input file.
* Each line is one review. Two labels at the beginning of each line.
* Label 1: Polarity={POS, NEG}
* Label 2: id={000-999}
* Labels are bounded with two colons ":"
* Output each word and 1 followed by it's polarity label (POS or NEG)
*/
public class NBTrainingMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
public void map(LongWritable key, Text value, Context context) throws InterruptedException, IOException {
// value is one line in the input file, which is one review
// two labels at the begining of each line
// :POS:/:NEG:
// :id: not used in this case
String[] doc = value.toString().split(" ");
String label = doc[0];
for (int i = 2; i < doc.length; i++) {
String word=doc[i].trim();
if (!word.isEmpty()){
context.write(new Text(word), new Text("1"+label));
}
}
}
}