Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ require (
github.com/andybalholm/brotli v1.2.1
github.com/apache/thrift v0.22.0
github.com/cespare/xxhash/v2 v2.3.0
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815
github.com/goccy/go-json v0.10.6
github.com/google/flatbuffers v25.12.19+incompatible
github.com/google/uuid v1.6.0
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 h1:bWDMxwH3px2JBh6AyO7hdCn/PkvCZXii8TGj7sbtEbQ=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
Expand Down
69 changes: 52 additions & 17 deletions parquet/cmd/parquet_reader/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package main

import (
"bufio"
"errors"
"flag"
"fmt"
"io"
"log"
Expand All @@ -30,8 +32,6 @@ import (
"github.com/apache/arrow-go/v18/parquet/file"
"github.com/apache/arrow-go/v18/parquet/metadata"
"github.com/apache/arrow-go/v18/parquet/schema"

"github.com/docopt/docopt-go"
)

var version = ""
Expand All @@ -47,33 +47,68 @@ Commands:

Options:
-h --help Show this screen.
--print-key-value-metadata Print out the key-value metadata. [default: false]
--only-metadata Stop after printing metadata, no values.
--no-metadata Do not print metadata.
--output=FILE Specify output file for data. [default: -]
--no-memory-map Disable memory mapping the file.
--int96-timestamp Parse INT96 as TIMESTAMP for legacy support.
--json Format output as JSON instead of text.
--csv Format output as CSV instead of text.
--columns=COLUMNS Specify a subset of columns to print, comma delimited indexes.`
`

func printUsage(fs *flag.FlagSet) {
fmt.Fprint(fs.Output(), usage)
fs.VisitAll(func(f *flag.Flag) {
name, flagUsage := flag.UnquoteUsage(f)
flagName := "--" + f.Name
if name != "" {
flagName += "=" + name
}
fmt.Fprintf(fs.Output(), " %-30s%s\n", flagName, flagUsage)
})
}

func main() {
opts, _ := docopt.ParseDoc(usage)
var config struct {
ColumnIndexes bool `docopt:"column-indexes"`
ColumnIndexes bool
PrintKeyValueMetadata bool
OnlyMetadata bool
NoMetadata bool
Output string
NoMemoryMap bool
JSON bool `docopt:"--json"`
CSV bool `docopt:"--csv"`
ParseInt96AsTimestamp bool `docopt:"--int96-timestamp"`
JSON bool
CSV bool
ParseInt96AsTimestamp bool
Columns string
File string
}
opts.Bind(&config)

args := os.Args[1:]
if len(args) > 0 && args[0] == "column-indexes" {
config.ColumnIndexes = true
args = args[1:]
}

fs := flag.NewFlagSet("parquet_reader", flag.ContinueOnError)
fs.SetOutput(os.Stderr)
fs.BoolVar(&config.OnlyMetadata, "only-metadata", false, "Stop after printing metadata, no values.")
fs.BoolVar(&config.NoMetadata, "no-metadata", false, "Do not print metadata.")
fs.BoolVar(&config.NoMemoryMap, "no-memory-map", false, "Disable memory mapping the file.")
fs.BoolVar(&config.JSON, "json", false, "Format output as JSON instead of text.")
fs.BoolVar(&config.CSV, "csv", false, "Format output as CSV instead of text.")
fs.StringVar(&config.Output, "output", "-", "Specify output `FILE` for data.")
fs.BoolVar(&config.PrintKeyValueMetadata, "print-key-value-metadata", false, "Print out the key-value metadata.")
fs.BoolVar(&config.ParseInt96AsTimestamp, "int96-timestamp", false, "Parse INT96 as TIMESTAMP for legacy support.")
fs.StringVar(&config.Columns, "columns", "", "Specify a subset of `COLUMNS` to print, comma delimited indexes.")
fs.Usage = func() {
printUsage(fs)
}

if err := fs.Parse(args); err != nil {
if errors.Is(err, flag.ErrHelp) {
os.Exit(0)
}
os.Exit(1)
}
if fs.NArg() != 1 {
fs.Usage()
fmt.Fprintln(os.Stderr, "expected exactly one parquet file")
os.Exit(1)
}
config.File = fs.Arg(0)
parseInt96AsTimestamp = config.ParseInt96AsTimestamp

var dataOut io.Writer
Expand Down
32 changes: 24 additions & 8 deletions parquet/cmd/parquet_schema/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,39 @@ import (

"github.com/apache/arrow-go/v18/parquet/file"
"github.com/apache/arrow-go/v18/parquet/schema"
"github.com/docopt/docopt-go"
)

const usage = `Parquet Schema Dumper.

Usage:
parquet_schema -h | --help
parquet_schema <file>

Options:
-h --help Show this screen.`
-h --help Show this screen.
`

func main() {
args, _ := docopt.ParseDoc(usage)
rdr, err := file.OpenParquetFile(args["<file>"].(string), false)
if err != nil {
fmt.Fprintln(os.Stderr, "Error opening parquet file: ", err)
args := os.Args[1:]

switch len(args) {
case 1:
switch args[0] {
case "-h", "--help":
fmt.Fprint(os.Stderr, usage)
os.Exit(0)
}

rdr, err := file.OpenParquetFile(args[0], false)
if err != nil {
fmt.Fprintln(os.Stderr, "Error opening parquet file:", err)
os.Exit(1)
}

schema.PrintSchema(rdr.MetaData().Schema.Root(), os.Stdout, 2)
default:
fmt.Fprint(os.Stderr, usage)
fmt.Fprintln(os.Stderr, "expected exactly one parquet file")
os.Exit(1)
}

schema.PrintSchema(rdr.MetaData().Schema.Root(), os.Stdout, 2)
}