Skip to content

Commit

Permalink
ghokin: manage non UTF-8 charset detection
Browse files Browse the repository at this point in the history
* Add an automatic conversion to UTF-8 charset to be able to manage formatting
* Add a warning to the README about how non UTF-8 files are managed
  • Loading branch information
antham committed Dec 29, 2023
1 parent 94e710f commit af66b75
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Flags:
Use "ghokin [command] --help" for more information about a command.
```

⚠️ Ghokin works only on `UTF-8` encoded files, it will detect and convert automatically files that are not encoded in this charset.

### fmt stdout

Dump stdin or a feature file formatted on stdout
Expand Down
18 changes: 18 additions & 0 deletions ghokin/file_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ package ghokin
import (
"bytes"
"fmt"
"io"
"os"
mpath "path"
"path/filepath"
"sync"

"github.com/antham/ghokin/v3/ghokin/internal/transformer"
"github.com/saintfish/chardet"
"golang.org/x/net/html/charset"
)

// ProcessFileError is emitted when processing a file trigger an error
Expand Down Expand Up @@ -45,6 +48,21 @@ func (f FileManager) Transform(filename string) ([]byte, error) {
if err != nil {
return []byte{}, err
}
detector := chardet.NewTextDetector()
result, err := detector.DetectBest(content)
if err != nil {
return []byte{}, err

Check warning on line 54 in ghokin/file_manager.go

View check run for this annotation

Codecov / codecov/patch

ghokin/file_manager.go#L54

Added line #L54 was not covered by tests
}
if result.Charset != "UTF-8" {
r, err := charset.NewReaderLabel(result.Charset, bytes.NewBuffer(content))
if err != nil {
return []byte{}, err

Check warning on line 59 in ghokin/file_manager.go

View check run for this annotation

Codecov / codecov/patch

ghokin/file_manager.go#L59

Added line #L59 was not covered by tests
}
content, err = io.ReadAll(r)
if err != nil {
return []byte{}, err

Check warning on line 63 in ghokin/file_manager.go

View check run for this annotation

Codecov / codecov/patch

ghokin/file_manager.go#L63

Added line #L63 was not covered by tests
}
}
contentTransformer := &transformer.ContentTransformer{}
contentTransformer.DetectSettings(content)
content = contentTransformer.Prepare(content)
Expand Down
9 changes: 9 additions & 0 deletions ghokin/file_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ func TestFileManagerTransform(t *testing.T) {
assert.EqualValues(t, string(b), string(buf))
},
},
{
"fixtures/iso-8859-1-encoding.input.feature",
func(buf []byte, err error) {
assert.NoError(t, err)
b, e := os.ReadFile("fixtures/iso-8859-1-encoding.expected.feature")
assert.NoError(t, e)
assert.EqualValues(t, string(b), string(buf))
},
},
{
"fixtures/",
func(buf []byte, err error) {
Expand Down
11 changes: 11 additions & 0 deletions ghokin/fixtures/iso-8859-1-encoding.expected.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Feature: aäáeéoóouüußä
In order to test something
As a felhasználó
I want to be able to do something

Scenario: Test
Given those values
| value |
| aäáeéoóouüußä |
When I do 'aäáeéoóouüußä'
Then aäáeéoóouüußä happens
11 changes: 11 additions & 0 deletions ghokin/fixtures/iso-8859-1-encoding.input.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Feature: aäáeéoóouüußä
In order to test something
As a felhasználó
I want to be able to do something

Scenario: Test
Given those values
| value |
| aäáeéoóouüußä |
When I do 'aäáeéoóouüußä'
Then aäáeéoóouüußä happens
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ require (
github.com/fatih/color v1.16.0
github.com/gofrs/uuid v4.4.0+incompatible // indirect
github.com/mitchellh/go-homedir v1.1.0
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
github.com/spf13/cobra v1.8.0
github.com/spf13/viper v1.18.2
github.com/stretchr/testify v1.8.4
golang.org/x/net v0.19.0
)

go 1.13
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1567,6 +1567,8 @@ github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6ke
github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
Expand Down Expand Up @@ -1802,6 +1804,7 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ=
golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down

0 comments on commit af66b75

Please sign in to comment.