Skip to content

Commit

Permalink
normalize stronger, so that users can e.g. type "noergeln" to find "n…
Browse files Browse the repository at this point in the history
…örgeln" or "Gruss" to find "Gruß"
  • Loading branch information
danielnaber committed Aug 6, 2016
1 parent e9cab23 commit d9d4b71
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 14 deletions.
2 changes: 1 addition & 1 deletion application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
app.grails.version=2.3.11
app.name=openthesaurus
app.servlet.version=2.5
app.version=1.3.63
app.version=1.3.64
plugins.feeds=1.5
plugins.hibernate=2.2.4
plugins.mail=0.7.1
Expand Down
9 changes: 3 additions & 6 deletions grails-app/views/ajaxSearch/ajaxMainSearch.gsp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@
</g:if>
<g:else>
<%
String quotedQuery = Pattern.quote(params.q?.trim())
// treat o = ö - this is debatable, but it's how MySQL searches, so highlight this way:
// TODO: this had to be commented out again because it doesn't work with quoting the input,
// which is needed to avoid an exception if a user searches for e.g. "foo) "
//umlautNormalizedQuery = umlautNormalizedQuery.replaceAll('[ÜüUu]', '[ÜüUu]').replaceAll('[ÄäAa]', '[ÄäAa]').replaceAll('[ÖöOo]', '[ÖöOo]')
String q = params.q?.trim()
String quotedQuery = Pattern.quote(q)
String directPatternStr = "\\b(" + quotedQuery + ")\\b";
Pattern directPattern = Pattern.compile(directPatternStr, Pattern.CASE_INSENSITIVE);
%>
Expand All @@ -41,7 +38,7 @@
directMatchingTerm = directMatchingTerm.encodeAsHTML();
directMatchingTerm = directMatchingTerm.replace("___beginhighlight___", "<span class=\"synsetmatchDirect\">");
directMatchingTerm = directMatchingTerm.replace("___endhighlight___", "</span>");
if (term.normalizedWord2?.equalsIgnoreCase(params.q?.trim())) {
if (term.normalizedWord?.equalsIgnoreCase(StringTools.normalize(q)) || term.normalizedWord2?.equalsIgnoreCase(StringTools.normalize2(q))) {
directMatchingTerm = '<span class="synsetmatchDirect">' + directMatchingTerm + '</span>'
}
%>
Expand Down
4 changes: 4 additions & 0 deletions src/groovy/com/vionto/vithesaurus/SearchResult.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,9 @@ class SearchResult {
this.synsetList = synsetList
this.completeResult = completeResult
}

public String toString() {
return synsetList
}

}
19 changes: 12 additions & 7 deletions src/java/com/vionto/vithesaurus/tools/StringTools.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,23 @@ public static String wikipediaUrlsToLinks(String textWithUrls) {
* Normalize the word for the 'normalizedWord' field.
*/
public static String normalize(String word) {
String cleanWord = word.replaceAll("[.!?,]", "");
cleanWord = cleanWord.replaceAll("\\(.*?\\)", "").replaceAll("\\s+", " ").trim();
return cleanWord;
return cleanWord(word).replaceAll("\\(.*?\\)", "").replaceAll("\\s+", " ").trim();
}

/**
* Normalize the word for the 'normalizedWord2' field.
*/
public static String normalize2(String word) {
String cleanWord = word.replaceAll("[.!?,]", "");
cleanWord = cleanWord.replace("(", "").replace(")", "").replaceAll("\\s+", " ").trim();
return cleanWord;
return cleanWord(word).replace("(", "").replace(")", "").replaceAll("\\s+", " ").trim();
}

private static String cleanWord(String word) {
// this way we can find "nörgeln" when the user searches for "noergeln" (e.g. because they have no German keyboard):
return word.replaceAll("[.!?,]", "")
.replace("Ä", "Ae").replace("ä", "ae")
.replace("Ü", "Ue").replace("ü", "ue")
.replace("Ö", "Oe").replace("ö", "oe")
.replace("ß", "ss");
}

public static String normalizeForSort(String s) {
Expand Down

0 comments on commit d9d4b71

Please sign in to comment.