Skip to content

Commit

Permalink
mlr unspace verb (#1167)
Browse files Browse the repository at this point in the history
* mlr unspace verb

* unit tests

* unit tests

* lint
  • Loading branch information
johnkerl committed Jan 1, 2023
1 parent b89371d commit b518bf0
Show file tree
Hide file tree
Showing 16 changed files with 317 additions and 4 deletions.
7 changes: 3 additions & 4 deletions docs/src/data/spaces.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
a b c,def,g h i
123,4567,890
2468,1357,3579
9987,3312,4543
column 1,column 2,column 3
apple,ball,cat
dale egg,fish,gale
66 changes: 66 additions & 0 deletions docs/src/reference-verbs.md
Original file line number Diff line number Diff line change
Expand Up @@ -4078,6 +4078,72 @@ count color shape flag
2 yellow triangle 1
</pre>

## unspace

<pre class="pre-highlight-in-pair">
<b>mlr unspace --help</b>
</pre>
<pre class="pre-non-highlight-in-pair">
Usage: mlr unspace [options]
Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
Options:
-f {x} Replace spaces with specified filler character.
-k Unspace only keys, not keys and values.
-v Unspace only values, not keys and values.
-h|--help Show this message.
</pre>

The primary use-case is for PPRINT output, which is space-delimited. For example:

<pre class="pre-highlight-in-pair">
<b>cat data/spaces.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
column 1, column 2, column 3
apple,ball,cat
dale egg,fish,gale
</pre>

<pre class="pre-highlight-in-pair">
<b>mlr --icsv --opprint cat data/spaces.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
column 1 column 2 column 3
apple ball cat
dale egg fish gale
</pre>

<pre class="pre-highlight-in-pair">
<b>mlr --icsv --opprint cat data/spaces.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
column 1 column 2 column 3
apple ball cat
dale egg fish gale
</pre>

<pre class="pre-highlight-in-pair">
<b>mlr --icsv --opprint unspace data/spaces.csv</b>
</pre>
<pre class="pre-non-highlight-in-pair">
column_1 _column_2 _column_3
apple ball cat
dale_egg fish gale
</pre>

<pre class="pre-highlight-in-pair">
<b>mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat</b>
</pre>
<pre class="pre-non-highlight-in-pair">
column_1 apple
_column_2 ball
_column_3 cat

column_1 dale_egg
_column_2 fish
_column_3 gale
</pre>

## unsparsify

<pre class="pre-highlight-in-pair">
Expand Down
28 changes: 28 additions & 0 deletions docs/src/reference-verbs.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -1229,6 +1229,34 @@ GENMD-RUN-COMMAND
mlr --opprint uniq -a -c data/repeats.dkvp
GENMD-EOF

## unspace

GENMD-RUN-COMMAND
mlr unspace --help
GENMD-EOF

The primary use-case is for PPRINT output, which is space-delimited. For example:

GENMD-RUN-COMMAND
cat data/spaces.csv
GENMD-EOF

GENMD-RUN-COMMAND
mlr --icsv --opprint cat data/spaces.csv
GENMD-EOF

GENMD-RUN-COMMAND
mlr --icsv --opprint cat data/spaces.csv
GENMD-EOF

GENMD-RUN-COMMAND
mlr --icsv --opprint unspace data/spaces.csv
GENMD-EOF

GENMD-RUN-COMMAND
mlr --icsv --opprint unspace data/spaces.csv | mlr --ipprint --oxtab cat
GENMD-EOF

## unsparsify

GENMD-RUN-COMMAND
Expand Down
1 change: 1 addition & 0 deletions internal/pkg/transformers/aaa_transformer_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
UTF8ToLatin1Setup,
UnflattenSetup,
UniqSetup,
UnspaceSetup,
UnsparsifySetup,
}

Expand Down
190 changes: 190 additions & 0 deletions internal/pkg/transformers/unspace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
package transformers

import (
"container/list"
"fmt"
"os"
"strings"

"github.com/johnkerl/miller/internal/pkg/cli"
"github.com/johnkerl/miller/internal/pkg/mlrval"
"github.com/johnkerl/miller/internal/pkg/types"
)

// ----------------------------------------------------------------
const verbNameUnspace = "unspace"

var UnspaceSetup = TransformerSetup{
Verb: verbNameUnspace,
UsageFunc: transformerUnspaceUsage,
ParseCLIFunc: transformerUnspaceParseCLI,
IgnoresInput: false,
}

func transformerUnspaceUsage(
o *os.File,
) {
fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameUnspace)
fmt.Fprintf(o, "Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.\n")
fmt.Fprintf(o, "Options:\n")
fmt.Fprintf(o, "-f {x} Replace spaces with specified filler character.\n")
fmt.Fprintf(o, "-k Unspace only keys, not keys and values.\n")
fmt.Fprintf(o, "-v Unspace only values, not keys and values.\n")
fmt.Fprintf(o, "-h|--help Show this message.\n")
}

func transformerUnspaceParseCLI(
pargi *int,
argc int,
args []string,
_ *cli.TOptions,
doConstruct bool, // false for first pass of CLI-parse, true for second pass
) IRecordTransformer {

// Skip the verb name from the current spot in the mlr command line
argi := *pargi
verb := args[argi]
argi++

filler := "_"
which := "keys_and_values"

for argi < argc /* variable increment: 1 or 2 depending on flag */ {
opt := args[argi]
if !strings.HasPrefix(opt, "-") {
break // No more flag options to process
}
if args[argi] == "--" {
break // All transformers must do this so main-flags can follow verb-flags
}
argi++

if opt == "-h" || opt == "--help" {
transformerUnspaceUsage(os.Stdout)
os.Exit(0)

} else if opt == "-f" {
filler = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc)

} else if opt == "-k" {
which = "keys_only"

} else if opt == "-v" {
which = "values_only"

} else {
transformerUnspaceUsage(os.Stderr)
os.Exit(1)
}
}

*pargi = argi
if !doConstruct { // All transformers must do this for main command-line parsing
return nil
}

transformer, err := NewTransformerUnspace(filler, which)
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}

return transformer
}

// ----------------------------------------------------------------
type TransformerUnspace struct {
filler string
recordTransformerFunc RecordTransformerFunc
}

func NewTransformerUnspace(
filler string,
which string,
) (*TransformerUnspace, error) {
tr := &TransformerUnspace{filler: filler}
if which == "keys_only" {
tr.recordTransformerFunc = tr.transformKeysOnly
} else if which == "values_only" {
tr.recordTransformerFunc = tr.transformValuesOnly
} else {
tr.recordTransformerFunc = tr.transformKeysAndValues
}
return tr, nil
}

func (tr *TransformerUnspace) Transform(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
inputDownstreamDoneChannel <-chan bool,
outputDownstreamDoneChannel chan<- bool,
) {
HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
if !inrecAndContext.EndOfStream {
tr.recordTransformerFunc(
inrecAndContext,
outputRecordsAndContexts,
inputDownstreamDoneChannel,
outputDownstreamDoneChannel,
)
} else { // end of record stream
outputRecordsAndContexts.PushBack(inrecAndContext)
}
}

func (tr *TransformerUnspace) transformKeysOnly(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
_ <-chan bool,
__ chan<- bool,
) {
inrec := inrecAndContext.Record
newrec := mlrval.NewMlrmapAsRecord()
for pe := inrec.Head; pe != nil; pe = pe.Next {
newkey := tr.unspace(pe.Key)
// Reference not copy since this is ownership transfer of the value from the now-abandoned inrec
newrec.PutReference(newkey, pe.Value)
}
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
}

func (tr *TransformerUnspace) transformValuesOnly(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
_ <-chan bool,
__ chan<- bool,
) {
inrec := inrecAndContext.Record
for pe := inrec.Head; pe != nil; pe = pe.Next {
stringval, ok := pe.Value.GetStringValue()
if ok {
pe.Value = mlrval.FromString(tr.unspace(stringval))
}
}
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(inrec, &inrecAndContext.Context))
}

func (tr *TransformerUnspace) transformKeysAndValues(
inrecAndContext *types.RecordAndContext,
outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
_ <-chan bool,
__ chan<- bool,
) {
inrec := inrecAndContext.Record
newrec := mlrval.NewMlrmapAsRecord()
for pe := inrec.Head; pe != nil; pe = pe.Next {
newkey := tr.unspace(pe.Key)
stringval, ok := pe.Value.GetStringValue()
if ok {
stringval = tr.unspace(stringval)
newrec.PutReference(newkey, mlrval.FromString(stringval))
} else {
newrec.PutReference(newkey, pe.Value)
}
}
outputRecordsAndContexts.PushBack(types.NewRecordAndContext(newrec, &inrecAndContext.Context))
}

func (tr *TransformerUnspace) unspace(input string) string {
return strings.ReplaceAll(input, " ", tr.filler)
}
10 changes: 10 additions & 0 deletions test/cases/cli-help/0001/expout
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,16 @@ Options:
With -n, produces only one record which is the unique-record count.
With neither -c nor -n, produces unique records.

================================================================
unspace
Usage: mlr unspace [options]
Replaces spaces in record keys and/or values with _. This is helpful for PPRINT output.
Options:
-f {x} Replace spaces with specified filler character.
-k Unspace only keys, not keys and values.
-v Unspace only values, not keys and values.
-h|--help Show this message.

================================================================
unsparsify
Usage: mlr unsparsify [options]
Expand Down
1 change: 1 addition & 0 deletions test/cases/verb-unspace/0001/cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mlr --c2p unspace test/input/spaces.csv
Empty file.
4 changes: 4 additions & 0 deletions test/cases/verb-unspace/0001/expout
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
a_b c _d_e
1 - 3
4_5 6 _7__8
9 10 11
1 change: 1 addition & 0 deletions test/cases/verb-unspace/0002/cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mlr --c2p unspace -k test/input/spaces.csv
Empty file.
4 changes: 4 additions & 0 deletions test/cases/verb-unspace/0002/expout
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
a_b c _d_e
1 - 3
4 5 6 7 8
9 10 11
1 change: 1 addition & 0 deletions test/cases/verb-unspace/0003/cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mlr --c2p unspace -v test/input/spaces.csv
Empty file.
4 changes: 4 additions & 0 deletions test/cases/verb-unspace/0003/expout
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
a b c d e
1 - 3
4_5 6 _7__8
9 10 11
4 changes: 4 additions & 0 deletions test/input/spaces.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
a b,c, d e
1,,3
4 5,6, 7 8
9,10,11

0 comments on commit b518bf0

Please sign in to comment.