Skip to content

Commit

Permalink
switch charset mapping libraries
Browse files Browse the repository at this point in the history
Previously we used github.com/rogpeppe/go-charset
But this library is no longer maintained.

This commit switches to golang.org/x/text/encoding/charmap

These are only needed for unit tests and cmd-line tooling.
  • Loading branch information
mschoch committed Mar 30, 2020
1 parent 0a3ad6f commit e976d41
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 9 deletions.
29 changes: 26 additions & 3 deletions cmd/stempel/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ import (
"log"
"os"
"strings"
"unicode"

"github.com/blevesearch/stempel"
"github.com/rogpeppe/go-charset/charset"
_ "github.com/rogpeppe/go-charset/data"
"golang.org/x/text/encoding/charmap"
)

var input = flag.String("i", "", "input file")
Expand Down Expand Up @@ -59,7 +59,7 @@ func main() {
}

if *encoding != "" {
reader, err = charset.NewReader(*encoding, reader)
reader, err = findEncoding(*encoding, reader)
if err != nil {
log.Fatal(err)
}
Expand Down Expand Up @@ -98,3 +98,26 @@ func main() {
log.Fatal(err)
}
}

func findEncoding(encoding string, r io.Reader) (io.Reader, error) {
// walk through all charmaps looking for a match
for _, enc := range charmap.All {
if cm, ok := enc.(*charmap.Charmap); ok {
if strings.Map(mapForCompare, cm.String()) == strings.Map(mapForCompare, encoding) {
return cm.NewDecoder().Reader(r), nil
}
}
}
return nil, fmt.Errorf("no charmap found for encoding %s", encoding)
}

func mapForCompare(r rune) rune {
// remove space and punctuation
if unicode.IsSpace(r) {
return -1
} else if unicode.IsPunct(r) {
return -1
}
// otherwise return lowercase
return unicode.ToLower(r)
}
8 changes: 2 additions & 6 deletions file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ import (
"strings"
"testing"

"github.com/rogpeppe/go-charset/charset"
_ "github.com/rogpeppe/go-charset/data"
"golang.org/x/text/encoding/charmap"
)

func TestEmpty(t *testing.T) {
Expand Down Expand Up @@ -60,10 +59,7 @@ func TestStem(t *testing.T) {
}
}()

cr, err := charset.NewReader("iso-8859-2", wordfile)
if err != nil {
t.Fatal(err)
}
cr := charmap.ISO8859_2.NewDecoder().Reader(wordfile)

scanner := bufio.NewScanner(cr)
for scanner.Scan() {
Expand Down

0 comments on commit e976d41

Please sign in to comment.