Skip to content

Commit

Permalink
MapReduce :: 정규표현식 패턴으로 단어 검색
Browse files Browse the repository at this point in the history
  • Loading branch information
fver1004 committed May 21, 2017
1 parent 2fb80de commit d470d3c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
Binary file modified DaeguLibCounter/bin/DaeguLibCounter/WordProcess.class
Binary file not shown.
19 changes: 17 additions & 2 deletions DaeguLibCounter/src/DaeguLibCounter/WordProcess.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Expand All @@ -23,6 +25,10 @@ public class WordProcess {
Path pt=new Path("hdfs:/dic.txt");//Location of file in HDFS
LinkedHashMap<String, String[]> listMap = new LinkedHashMap<String, String[]>();
BufferedReader br;
String patternL = "(^|[^A-Z]|\\s)";
String patternR = "(\\s|[^A-Z+]|$)";
Pattern p;
Matcher m;

WordProcess() throws IOException{

Expand Down Expand Up @@ -51,10 +57,19 @@ public List<String> Processing(String bookname) throws IOException{
for(Map.Entry<String, String[]> entry : listMap.entrySet()){
String key = entry.getKey();
String[] value = entry.getValue();


//HASH value 배열 값들 비교. 정규표현식으로 아닌거 걸러내기
for(String token : value){
if(bookname.toUpperCase().contains(token))

String pattern = patternL + token + patternR;

p = Pattern.compile(pattern);
m = p.matcher(bookname);

if(m.find()){
tokenList.add(key);
break;}//중복토큰 없도록 break

}
}

Expand Down

0 comments on commit d470d3c

Please sign in to comment.