Custom word separator

c-bata · Jun 22, 2018 · 75aacfa · 75aacfa
1 parent c704dcd
commit 75aacfa
Show file tree

Hide file tree

Showing 2 changed files with 253 additions and 10 deletions.
diff --git a/document.go b/document.go
@@ -15,6 +15,8 @@ type Document struct {
 	// So if Document is "日本(cursor)語", cursorPosition is 2.
 	// But DisplayedCursorPosition returns 4 because '日' and '本' are double width characters.
 	cursorPosition int
+	// Separator to determine the beginning of a word. Space is applied if empty.
+	Separator string
 }
 
 // NewDocument return the new empty document.
@@ -97,7 +99,13 @@ func (d *Document) GetWordAfterCursorWithSpace() string {
 func (d *Document) FindStartOfPreviousWord() int {
 	// Reverse the text before the cursor, in order to do an efficient backwards search.
 	x := d.TextBeforeCursor()
-	if i := strings.LastIndexByte(x, ' '); i != -1 {
+	var i int
+	if d.Separator == "" {
+		i = strings.LastIndexByte(x, ' ')
+	} else {
+		i = strings.LastIndexAny(x, d.Separator)
+	}
+	if i != -1 {
 		return i + 1
 	} else {
 		return 0
@@ -108,42 +116,66 @@ func (d *Document) FindStartOfPreviousWord() int {
 // pointing to the end of the current word. Return `None` if nothing was found.
 func (d *Document) FindEndOfCurrentWord() int {
 	x := d.TextAfterCursor()
-	if i := strings.IndexByte(x, ' '); i != -1 {
+	var i int
+	if d.Separator == "" {
+		i = strings.IndexByte(x, ' ')
+	} else {
+		i = strings.IndexAny(x, d.Separator)
+	}
+	if i != -1 {
 		return i
 	} else {
-		return len([]rune(x))
+		return len(x)
 	}
 }
 
 // FindStartOfPreviousWordWithSpace is almost the same as FindStartOfPreviousWord.
-// The only difference is to ignore contiguous spaces.
+// The only difference is to ignore contiguous spaces or separators.
 func (d *Document) FindStartOfPreviousWordWithSpace() int {
 	// Reverse the text before the cursor, in order to do an efficient backwards search.
 	x := d.TextBeforeCursor()
+	var start, end int
 
-	end := lastIndexByteNot(x, ' ')
+	if d.Separator == "" {
+		end = lastIndexByteNot(x, ' ')
+	} else {
+		end = lastIndexAnyNot(x, d.Separator)
+	}
 	if end == -1 {
 		return 0
 	}
 
-	start := strings.LastIndexByte(x[:end], ' ')
+	if d.Separator == "" {
+		start = strings.LastIndexByte(x[:end], ' ')
+	} else {
+		start = strings.LastIndexAny(x[:end], d.Separator)
+	}
 	if start == -1 {
 		return 0
 	}
 	return start + 1
 }
 
 // FindEndOfCurrentWordWithSpace is almost the same as FindEndOfCurrentWord.
-// The only difference is to ignore contiguous spaces.
+// The only difference is to ignore contiguous spaces or separators.
 func (d *Document) FindEndOfCurrentWordWithSpace() int {
 	x := d.TextAfterCursor()
+	var start, end int
 
-	start := indexByteNot(x, ' ')
+	if d.Separator == "" {
+		start = indexByteNot(x, ' ')
+	} else {
+		start = indexAnyNot(x, d.Separator)
+	}
 	if start == -1 {
 		return len(x)
 	}
 
-	end := strings.IndexByte(x[start:], ' ')
+	if d.Separator == "" {
+		end = strings.IndexByte(x[start:], ' ')
+	} else {
+		end = strings.IndexAny(x[start:], d.Separator)
+	}
 	if end == -1 {
 		return len(x)
 	}
@@ -369,3 +401,71 @@ func lastIndexByteNot(s string, c byte) int {
 	}
 	return -1
 }
+
+type asciiSet [8]uint32
+
+func (as *asciiSet) notContains(c byte) bool {
+	return (as[c>>5] & (1 << uint(c&31))) == 0
+}
+
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+	for i := 0; i < len(chars); i++ {
+		c := chars[i]
+		if c >= utf8.RuneSelf {
+			return as, false
+		}
+		as[c>>5] |= 1 << uint(c&31)
+	}
+	return as, true
+}
+
+func indexAnyNot(s, chars string) int {
+	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := 0; i < len(s); i++ {
+					if as.notContains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
+		for i := 0; i < len(s); {
+			// I don't know why strings.IndexAny doesn't add rune count here.
+			r, size := utf8.DecodeRuneInString(s[i:])
+			i += size
+			for _, c := range chars {
+				if r != c {
+					return i
+				}
+			}
+		}
+	}
+	return -1
+}
+
+func lastIndexAnyNot(s, chars string) int {
+	if len(chars) > 0 {
+		if len(s) > 8 {
+			if as, isASCII := makeASCIISet(chars); isASCII {
+				for i := len(s) - 1; i >= 0; i-- {
+					if as.notContains(s[i]) {
+						return i
+					}
+				}
+				return -1
+			}
+		}
+		for i := len(s); i > 0; {
+			r, size := utf8.DecodeLastRuneInString(s[:i])
+			i -= size
+			for _, c := range chars {
+				if r != c {
+					return i
+				}
+			}
+		}
+	}
+	return -1
+}