Permalink
Browse files

fixed crash that occurs when soundex encounters non english characters

  • Loading branch information...
1 parent 9bee55d commit 8ce92f2be336161f8198dbed2fce57f5ad5b005e @gmilette gmilette committed Oct 22, 2012
@@ -17,14 +17,14 @@
import org.apache.commons.codec.language.Soundex;
+import root.gast.speech.text.match.SoundsLikeWordMatcher;
+
/**
* mark soundex matches with a &
* @author Greg Milette &#60;<a href="mailto:gregorym@gmail.com">gregorym@gmail.com</a>&#62;
*/
public class MatchesTargetVisitorSoundex extends MatchesTargetVisitor
{
- private Soundex soundex;
-
public MatchesTargetVisitorSoundex(String target)
{
super(target);
@@ -39,11 +39,9 @@ protected String getMark()
@Override
protected String encode(String toEncode)
{
- if (soundex == null)
- {
- soundex = new Soundex();
- }
- return soundex.soundex(toEncode);
+ //encodes it
+ SoundsLikeWordMatcher matcher = new SoundsLikeWordMatcher(toEncode);
+ return matcher.getWords().iterator().next();
}
}
@@ -62,7 +62,17 @@ public boolean isIn(String word)
private static String encode(String in)
{
- return soundex.encode(in);
+ String encoded = in;
+ try
+ {
+ encoded = soundex.encode(in);
+ }
+ catch (IllegalArgumentException e)
+ {
+ //for weird characters that soundex doesn't understand
+
+ }
+ return encoded;
}
}

0 comments on commit 8ce92f2

Please sign in to comment.