Permalink
Browse files

Close #93 - \\nto false postives

  • Loading branch information...
client9 committed Apr 13, 2017
1 parent 3f1ba16 commit 80f39305631920bac861a3c4bf6acb8631eae665
Showing with 14 additions and 3 deletions.
  1. +1 −0 falsepositives_test.go
  2. +12 −3 notwords.go
  3. +1 −0 notwords_test.go
View
@@ -121,6 +121,7 @@ func TestFalsePositives(t *testing.T) {
"bodyreader", // variable name
"cantPrepare", // variable name
"dontPrepare", // variable name
"\\nto", // https://github.com/client9/misspell/issues/93
}
r := New()
r.Debug = true
View
@@ -6,8 +6,11 @@ import (
"strings"
)
var reEmail = regexp.MustCompile(`[a-zA-Z0-9_.%+-]+@[a-zA-Z0-9-.]+\.[a-zA-Z]{2,6}[^a-zA-Z]`)
var reHost = regexp.MustCompile(`[a-zA-Z0-9-.]+\.[a-zA-Z]+`)
var (
reEmail = regexp.MustCompile(`[a-zA-Z0-9_.%+-]+@[a-zA-Z0-9-.]+\.[a-zA-Z]{2,6}[^a-zA-Z]`)
reHost = regexp.MustCompile(`[a-zA-Z0-9-.]+\.[a-zA-Z]+`)
reBackslash = regexp.MustCompile(`\\[a-z]`)
)
// RemovePath attempts to strip away embedded file system paths, e.g.
// /foo/bar or /static/myimg.png
@@ -69,8 +72,14 @@ func RemoveHost(s string) string {
return reHost.ReplaceAllStringFunc(s, replaceWithBlanks)
}
// RemoveBackslashEscapes removes characters that are preceeded by a backslash
// commonly found in printf format stringd "\nto"
func removeBackslashEscapes(s string) string {
return reBackslash.ReplaceAllStringFunc(s, replaceWithBlanks)
}
// RemoveNotWords blanks out all the not words
func RemoveNotWords(s string) string {
// do most selective/specific first
return RemoveHost(RemoveEmail(RemovePath(StripURL(s))))
return removeBackslashEscapes(RemoveHost(RemoveEmail(RemovePath(StripURL(s)))))
}
View
@@ -16,6 +16,7 @@ func TestNotWords(t *testing.T) {
{"x nickg@client9.xxx y", "x y"},
{"x infinitie.net y", "x y"},
{"(s.svc.GetObject(", "( ("},
{"\\nto", " to"},
}
for pos, tt := range cases {
got := RemoveNotWords(tt.word)

0 comments on commit 80f3930

Please sign in to comment.