From 221f74d510f972191a4ce6d36af5686725c2802e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrea=20Funt=C3=B2?= Date: Thu, 18 May 2023 09:45:03 +0200 Subject: [PATCH 1/3] Add support for table format in spec (JSON, YAML, CSV) --- _test/import-csv.yaml | 16 ++- _test/import-json.yaml | 16 ++- _test/import-yaml.yaml | 17 ++- _test/test.csv | 16 +-- _test/test.json | 28 +++- _test/test.yaml | 16 ++- client/client.go | 2 +- client/spec.go | 21 ++- resources/data.go | 318 +++++++++++++++++++++++++++-------------- 9 files changed, 306 insertions(+), 144 deletions(-) diff --git a/_test/import-csv.yaml b/_test/import-csv.yaml index 76c7209..a534253 100644 --- a/_test/import-csv.yaml +++ b/_test/import-csv.yaml @@ -14,10 +14,18 @@ spec: format: csv table: my_csv_data separator: "," - keys: [color] - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/import-json.yaml b/_test/import-json.yaml index b058c51..a866c2c 100644 --- a/_test/import-json.yaml +++ b/_test/import-json.yaml @@ -13,10 +13,18 @@ spec: file: ./test.json format: json table: my_json_data - keys: [color] - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/import-yaml.yaml b/_test/import-yaml.yaml index 8ddfbd7..ea60bb1 100644 --- a/_test/import-yaml.yaml +++ b/_test/import-yaml.yaml @@ -13,11 +13,18 @@ spec: file: ./test.yaml format: yaml table: my_yaml_data - keys: - - color - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/test.csv b/_test/test.csv index f99b87b..0d8e063 100644 --- a/_test/test.csv +++ b/_test/test.csv @@ -1,8 +1,8 @@ -color,value -red,#f00 -green,#0f0 -blue,#00f -cyan,#0ff -magenta,#f0f -yellow,#ff0 -black,#000 \ No newline at end of file +color,value,optimized,count +red,#f00,false,-123 +green,#0f0,true,345 +blue,#00f,false,-345 +cyan,#0ff,true,678 +magenta,#f0f,false,-678 +yellow,#ff0,true,901 +black,#000,false,-901 \ No newline at end of file diff --git a/_test/test.json b/_test/test.json index c522c1f..30f129b 100644 --- a/_test/test.json +++ b/_test/test.json @@ -1,30 +1,44 @@ [ { "color": "red", - "value": "#f00" + "value": "#f00", + "optimized": true, + "count": 123 }, { "color": "green", - "value": "#0f0" + "value": "#0f0", + "optimized": false, + "count": -123 }, { "color": "blue", - "value": "#00f" + "value": "#00f", + "optimized": true, + "count": -321 }, { "color": "cyan", - "value": "#0ff" + "value": "#0ff", + "optimized": false, + "count": 321 }, { "color": "magenta", - "value": "#f0f" + "value": "#f0f", + "optimized": true, + "count": 456 }, { "color": "yellow", - "value": "#ff0" + "value": "#ff0", + "optimized": false, + "count": -456 }, { "color": "black", - "value": "#000" + "value": "#000", + "optimized": true, + "count": 789 } ] \ No newline at end of file diff --git a/_test/test.yaml b/_test/test.yaml index 9316a94..bff1616 100644 --- a/_test/test.yaml +++ b/_test/test.yaml @@ -1,15 +1,29 @@ --- - color: red value: "#f00" + optimized: true + count: 123 - color: green value: "#0f0" + optimized: false + count: -123 - color: blue value: "#00f" + optimized: true + count: -321 - color: cyan value: "#0ff" + optimized: false + count: 321 - color: magenta value: "#f0f" + optimized: true + count: 456 - color: yellow value: "#ff0" + optimized: false + count: -456 - color: black - value: "#000" \ No newline at end of file + value: "#000" + optimized: true + count: 789 \ No newline at end of file diff --git a/client/client.go b/client/client.go index 575a633..c097d78 100644 --- a/client/client.go +++ b/client/client.go @@ -14,7 +14,7 @@ import ( type Client struct { Logger zerolog.Logger Specs *Spec - Data []map[string]any + //Data []map[string]any } func (c *Client) ID() string { diff --git a/client/spec.go b/client/spec.go index 15be9b6..a847a80 100644 --- a/client/spec.go +++ b/client/spec.go @@ -1,11 +1,18 @@ package client +type Column struct { + Name string `json:"name,omitempty" yaml:"name,omitempty"` + Description *string `json:"description,omitempty" yaml:"description,omitempty"` + Type string `json:"type,omitempty" yaml:"type,omitempty"` + Key bool `json:"key,omitempty" yaml:"pk,omitempty"` + Unique bool `json:"unique,omitempty" yaml:"unique,omitempty"` + NotNull bool `json:"notnull,omitempty" yaml:"notnull,omitempty"` +} type Spec struct { - File string `json:"file,omitempty" yaml:"file,omitempty"` - Format string `json:"format,omitempty" yaml:"format,omitempty"` - Table string `json:"table,omitempty" yaml:"table,omitempty"` - Keys []string `json:"keys,omitempty" yaml:"keys,omitempty"` - Types map[string]string `json:"types,omitempty" yaml:"types,omitempty"` - Separator *string `json:"separator,omitempty" yaml:"separator,omitempty"` // CSV only - Sheet *string `json:"sheet,omitempty" yaml:"sheet,omitempty"` // XLSX only + File string `json:"file,omitempty" yaml:"file,omitempty"` + Format string `json:"format,omitempty" yaml:"format,omitempty"` + Table string `json:"table,omitempty" yaml:"table,omitempty"` + Columns []Column `json:"columns,omitempty" yaml:"columns,omitempty"` + Separator *string `json:"separator,omitempty" yaml:"separator,omitempty"` // CSV only + Sheet *string `json:"sheet,omitempty" yaml:"sheet,omitempty"` // XLSX only } diff --git a/resources/data.go b/resources/data.go index 6d913c2..985c055 100644 --- a/resources/data.go +++ b/resources/data.go @@ -5,7 +5,6 @@ import ( "bytes" "context" "encoding/json" - "errors" "fmt" "os" "strings" @@ -14,54 +13,238 @@ import ( "github.com/dihedron/cq-plugin-utils/format" "github.com/dihedron/cq-plugin-utils/pointer" "github.com/dihedron/cq-source-file/client" - "github.com/xuri/excelize/v2" - "gopkg.in/yaml.v3" + "gopkg.in/yaml.v2" ) func GetTables(ctx context.Context, meta schema.ClientMeta) (schema.Tables, error) { - client := meta.(*client.Client) - client.Logger.Debug().Str("file", client.Specs.File).Msg("reading input from file") + columns := []schema.Column{} + for _, c := range client.Specs.Columns { + client.Logger.Debug().Str("name", c.Name).Msg("adding column") + if c.Description == nil { + c.Description = pointer.To(fmt.Sprintf("The column mapping the %q field from the input data", c.Name)) + } + column := schema.Column{ + Name: c.Name, + Description: *c.Description, + Resolver: fetchColumn, + CreationOptions: schema.ColumnCreationOptions{ + PrimaryKey: c.Key, + Unique: c.Unique, + NotNull: c.NotNull, + }, + } + switch strings.ToLower(c.Type) { + case "string", "str", "s": + client.Logger.Debug().Str("name", c.Name).Msg("column is of type string") + column.Type = schema.TypeString + case "integer", "int", "i": + client.Logger.Debug().Str("name", c.Name).Msg("column is of type int") + column.Type = schema.TypeInt + case "boolean", "bool", "b": + client.Logger.Debug().Str("name", c.Name).Msg("column is of type bool") + column.Type = schema.TypeBool + default: + client.Logger.Debug().Str("name", c.Name).Msg("column is of unmapped type, assuming string") + column.Type = schema.TypeString + } + columns = append(columns, column) + } + + client.Logger.Debug().Msg("returning table") + return []*schema.Table{ + { + Name: client.Specs.Table, + Resolver: fetchData, + Columns: columns, + }, + }, nil + + // client.Logger.Debug().Str("file", client.Specs.File).Msg("reading input from file") + + // client.Data = []map[string]any{} + // switch strings.ToLower(client.Specs.Format) { + // case "json": + // data, err := os.ReadFile(client.Specs.File) + // if err != nil { + // client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + // return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + // } + // client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") + // if err := json.Unmarshal(data, &client.Data); err != nil { + // client.Logger.Error().Err(err).Msg("error unmarshalling data from JSON") + // return nil, fmt.Errorf("error unmarshalling data from JSON: %w", err) + // } + // case "yaml", "yml": + // data, err := os.ReadFile(client.Specs.File) + // if err != nil { + // client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + // return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + // } + // client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") + // if err := yaml.Unmarshal(data, &client.Data); err != nil { + // client.Logger.Error().Err(err).Msg("error unmarshalling data from JSON") + // return nil, fmt.Errorf("error unmarshalling data from JSON: %w", err) + // } + // case "csv": + // data, err := os.ReadFile(client.Specs.File) + // if err != nil { + // client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + // return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + // } + // client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") + // if client.Specs.Separator == nil { + // client.Specs.Separator = pointer.To(",") + // } + // scanner := bufio.NewScanner(bytes.NewReader(data)) + // var keys []string + // client.Data = []map[string]any{} + // first := true + // for scanner.Scan() { + // line := scanner.Text() + // client.Logger.Debug().Str("line", line).Msg("read line from input file") + // if first { + // first = false + // keys = strings.Split(line, *client.Specs.Separator) + // } else { + // values := strings.Split(line, *client.Specs.Separator) + // entry := map[string]any{} + // for i := 0; i < len(keys); i++ { + // entry[keys[i]] = values[i] + // } + // client.Data = append(client.Data, entry) + // } + // } + // case "xsl", "xlsx", "excel": + // xls, err := excelize.OpenFile(client.Specs.File) + // if err != nil { + // client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + // return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + // } + // defer func() { + // if err := xls.Close(); err != nil { + // client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + // } + // }() + // // Get all the rows in the Sheet1. + // if client.Specs.Sheet == nil { + // // get the currently active sheet in the file + // client.Specs.Sheet = pointer.To(xls.GetSheetName(xls.GetActiveSheetIndex())) + // } + // client.Logger.Debug().Str("sheet", *client.Specs.Sheet).Msg("getting data from sheet") + // rows, err := xls.GetRows(*client.Specs.Sheet) + // if err != nil { + // client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error getting rows") + // return nil, fmt.Errorf("error getting rows from input file %q: %w", client.Specs.File, err) + // } + + // var keys []string + // client.Data = []map[string]any{} + // first := true + // for _, row := range rows { + // if first { + // first = false + // keys = row + // } else { + // values := row + // entry := map[string]any{} + // for i := 0; i < len(keys); i++ { + // entry[keys[i]] = values[i] + // } + // client.Data = append(client.Data, entry) + // } + // } + // default: + // client.Logger.Error().Str("format", client.Specs.Format).Msg("unsupported format") + // return nil, fmt.Errorf("unsupported format: %q", client.Specs.Format) + // } + + // if len(client.Data) > 0 { + // columns := []schema.Column{} + // for name := range client.Data[0] { + // client.Logger.Debug().Str("name", name).Msg("adding column") + // column := schema.Column{ + // Name: name, + // Description: fmt.Sprintf("The column mapping the %q field from the input data", name), + // Resolver: fetchColumn, + // } + // for _, v := range client.Specs.Keys { + // if name == v { + // client.Logger.Debug().Str("name", name).Msg("column is primary key") + // column.CreationOptions.PrimaryKey = true + // break + // } + // } + // switch strings.ToLower(client.Specs.Types[name]) { + // case "string", "str", "s": + // client.Logger.Debug().Str("name", name).Msg("column is of type string") + // column.Type = schema.TypeString + // case "integer", "int", "i": + // client.Logger.Debug().Str("name", name).Msg("column is of type int") + // column.Type = schema.TypeInt + // case "boolean", "bool", "b": + // client.Logger.Debug().Str("name", name).Msg("column is of type bool") + // column.Type = schema.TypeBool + // default: + // client.Logger.Debug().Str("name", name).Msg("column is of unmapped type, assuming string") + // column.Type = schema.TypeString + // } + // columns = append(columns, column) + // } + // client.Logger.Debug().Msg("returning table") + // return []*schema.Table{ + // { + // Name: client.Specs.Table, + // Resolver: fetchData, + // Columns: columns, + // }, + // }, nil + // } + // return nil, errors.New("no data in file") +} + +func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { + client := meta.(*client.Client) - client.Data = []map[string]any{} + rows := []map[string]any{} + client.Logger.Debug().Msg("fetching data...") switch strings.ToLower(client.Specs.Format) { case "json": data, err := os.ReadFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") - if err := json.Unmarshal(data, &client.Data); err != nil { + if err := json.Unmarshal(data, &rows); err != nil { client.Logger.Error().Err(err).Msg("error unmarshalling data from JSON") - return nil, fmt.Errorf("error unmarshalling data from JSON: %w", err) + return fmt.Errorf("error unmarshalling data from JSON: %w", err) } case "yaml", "yml": data, err := os.ReadFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") - if err := yaml.Unmarshal(data, &client.Data); err != nil { - client.Logger.Error().Err(err).Msg("error unmarshalling data from JSON") - return nil, fmt.Errorf("error unmarshalling data from JSON: %w", err) + if err := yaml.Unmarshal(data, &rows); err != nil { + client.Logger.Error().Err(err).Msg("error unmarshalling data from YAML") + return fmt.Errorf("error unmarshalling data from YAML: %w", err) } case "csv": data, err := os.ReadFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") if client.Specs.Separator == nil { client.Specs.Separator = pointer.To(",") } scanner := bufio.NewScanner(bytes.NewReader(data)) - var keys []string - client.Data = []map[string]any{} first := true + var keys []string for scanner.Scan() { line := scanner.Text() client.Logger.Debug().Str("line", line).Msg("read line from input file") @@ -70,106 +253,27 @@ func GetTables(ctx context.Context, meta schema.ClientMeta) (schema.Tables, erro keys = strings.Split(line, *client.Specs.Separator) } else { values := strings.Split(line, *client.Specs.Separator) - entry := map[string]any{} + row := map[string]any{} + //for i := 0; i < len(client.Specs.Columns); i++ { for i := 0; i < len(keys); i++ { - entry[keys[i]] = values[i] + for _, column := range client.Specs.Columns { + if keys[i] == column.Name { + row[client.Specs.Columns[i].Name] = values[i] + } + } } - client.Data = append(client.Data, entry) - } - } - case "xsl", "xlsx", "excel": - xls, err := excelize.OpenFile(client.Specs.File) - if err != nil { - client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) - } - defer func() { - if err := xls.Close(); err != nil { - client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - } - }() - // Get all the rows in the Sheet1. - if client.Specs.Sheet == nil { - // get the currently active sheet in the file - client.Specs.Sheet = pointer.To(xls.GetSheetName(xls.GetActiveSheetIndex())) - } - client.Logger.Debug().Str("sheet", *client.Specs.Sheet).Msg("getting data from sheet") - rows, err := xls.GetRows(*client.Specs.Sheet) - if err != nil { - client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error getting rows") - return nil, fmt.Errorf("error getting rows from input file %q: %w", client.Specs.File, err) - } - - var keys []string - client.Data = []map[string]any{} - first := true - for _, row := range rows { - if first { - first = false - keys = row - } else { - values := row - entry := map[string]any{} - for i := 0; i < len(keys); i++ { - entry[keys[i]] = values[i] - } - client.Data = append(client.Data, entry) + rows = append(rows, row) } } + // TODO: add more formats default: client.Logger.Error().Str("format", client.Specs.Format).Msg("unsupported format") - return nil, fmt.Errorf("unsupported format: %q", client.Specs.Format) - } + return fmt.Errorf("unsupported format: %q", client.Specs.Format) - if len(client.Data) > 0 { - columns := []schema.Column{} - for name := range client.Data[0] { - client.Logger.Debug().Str("name", name).Msg("adding column") - column := schema.Column{ - Name: name, - Description: fmt.Sprintf("The column mapping the %q field from the input data", name), - Resolver: fetchColumn, - } - for _, v := range client.Specs.Keys { - if name == v { - client.Logger.Debug().Str("name", name).Msg("column is primary key") - column.CreationOptions.PrimaryKey = true - break - } - } - switch strings.ToLower(client.Specs.Types[name]) { - case "string", "str", "s": - client.Logger.Debug().Str("name", name).Msg("column is of type string") - column.Type = schema.TypeString - case "integer", "int", "i": - client.Logger.Debug().Str("name", name).Msg("column is of type int") - column.Type = schema.TypeInt - case "boolean", "bool", "b": - client.Logger.Debug().Str("name", name).Msg("column is of type bool") - column.Type = schema.TypeBool - default: - client.Logger.Debug().Str("name", name).Msg("column is of unmapped type, assuming string") - column.Type = schema.TypeString - } - columns = append(columns, column) - } - client.Logger.Debug().Msg("returning table") - return []*schema.Table{ - { - Name: client.Specs.Table, - Resolver: fetchData, - Columns: columns, - }, - }, nil } - return nil, errors.New("no data in file") -} -func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { - client := meta.(*client.Client) - client.Logger.Debug().Msg("fetching data...") - for _, row := range client.Data { - client.Logger.Debug().Msg("returning single row") + for _, row := range rows { + client.Logger.Debug().Str("row", format.ToJSON(row)).Msg("returning single row") res <- row } return nil From c1eee4198b13b731c6821d1a2d03c8d8335f197a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrea=20Funt=C3=B2?= Date: Thu, 18 May 2023 09:56:45 +0200 Subject: [PATCH 2/3] Add support for XLSX to spec-typed columns --- _test/import-xlsx.yaml | 16 +++- _test/test.xlsx | Bin 10492 -> 10622 bytes resources/data.go | 192 +++++++++++------------------------------ 3 files changed, 61 insertions(+), 147 deletions(-) diff --git a/_test/import-xlsx.yaml b/_test/import-xlsx.yaml index 4f80365..448b3c5 100644 --- a/_test/import-xlsx.yaml +++ b/_test/import-xlsx.yaml @@ -13,10 +13,18 @@ spec: file: ./test.xlsx format: xlsx table: my_xlsx_data - keys: [color] - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/test.xlsx b/_test/test.xlsx index 8b0856fc44c061bf73a649e4e4087057d64f9eb7..bca2b3b005d2519433e973cd974bc040506f3529 100644 GIT binary patch delta 2225 zcmV;i2u}C>QT|e}Itc~$lau0VlRpV6e%9gG#owgV3_euj`M)2qR;u%&Ab4 zCHRRMe38C?H$N!7-zv370FWlZrL}eJc_u4aMa@XnOoFFSx}p|Lz4J`18O=?}f0(U= z7x@0vt0sla>4CW z3&p6);_IDMnr;Q8eI%2f4BJ=eif39GRoD@rye`OB`4ZnFq)R!S7hJIWPIg4=`U|bx zEdnD!ja}v3vOK|4Fx7#*c%hqmf6;IOUJ{0ZkG=F)DQ+~%SwS0N*HDBW4sZE>ltIARb0ztRNVD6AE#$`36sDUAAgoxZ`v>zhVLisKUltR4xEVyRSW9YNt32^hil_F zU^R&|+d#Wtzt0Yk+5wH;v@w43`tj?-m)Y%0S)d)0ii;}6aS&m|Dk?HwtyBE-$HUD8 zBc(}|kwR1~#ji}^+xf>2v%QedYQvaD;83X)Z?vwHFjREIN}>W$vkH20A%9Dvp()p) zswE?teW@(MUKEYOlJE*Q4vD(0e}7CY_HxtBTZIMtYB2L4Au!FlmAMHXCr)spCYhSR$wd% z)qKYmtSD0aDTYN=f9xpc8HNZ?W4wJnj`v{K?3+Xx%gMIT-^KnjLaHwhaaJJlu8WImH(k6@R>nS_x*2&I_?5+{HInAGh=Bu)-?7J!6q_>^4w+hYy*H{s^0^B%wCefk@#;=BEI=K^OxUU!SsmTw zg=Lmz$$uqo)k$Q$*P8ta6YxHdNK2&8C4nr@GH4zPLdPdWNl_n+xzz~*smAAtq*aF- z)l3xLCVcnNox$^htSpTi`@&()% zlhGd)1!1d&XHK)R8YTgM-%f)t7{%{Rd+z4Y2qpyK;$%7>##L`)woALfTHY z*YXv7F$-*n&I|9_p3`5?`O;*um!9f#Q`Y)Fwqw`O=>w&se4eD=P&l zI)Fio?D8VvHRY9t2VK>mZ2`t}BxNYCQ?#|VrWhN(1<6d{9LkA*>q<*z-BxdK&G;MJ z0G<#MV+ocCW*KgQemxM;5h?h9tarNX0D@yEL4vXdhe3$Hy%w}Ie_rU6ky94-P`ggY zxMQ~9y*Rk{rWlM((*#Y_#c=sU{Ia@#?5g^OYG=g(Wr^UFYhbq0*|idG+SG*xZazU0LL6Ra4U*``S~ z?Yr*`WlPtWPTLFiVeaR<|J^Y^Khy#}GHtk$DJG{rMoi|a;BuSduXi8EF-FExSx})Q zOYsvk_&j_6ZoXG~w^3?`03c0@D{Gs?^Gsf`nwnG9FbOB6(lxbU>aAxQ&1hjNf5vPr zyukOTUQM~gJt5I=VW_g?Ia{i{t(mkvL^DAxgl{U|n1NR3Z^6}6@7iXZtGWTih6`?= zIw(eUo?LCE(sUyr?E{GhGHhR=Yo2RmRB2BE<@G_n%9r>aA$`i(yySx2_p&3}G+$`# zZV?y>YV4}umK7vNm-*P$= z9UizW)E-2fMKBvrg9-b6ck;jsTctQSivwtp{@o|Wx0MB-AdaK%{v&)h0B}vWBH0!G zmcW#`!MJjD#YiU{Hu|dQs^$$YP+cxr;hF^iN7ggOLaVrz$*H&yI;R!lVl6fExk!Fyu6Qne0ZJQJXIyxfi_arDUE`V5~zjB zWxY=6x3BkCb4rZmbBib7X3U5URNMrrff}C_ zN>_i};!v-dX*BTMzf_fM6oyk)aaq&WA=a08sG^Vp?v&V8P}|l-L&+`9-)v-Kj&7B> z#IEA{aob!8RW)d_lBKlI-jtF`#1HFQX}&6P-A~b&i=($6^z17sv@)u&0ot*4qWv{5 z*n*)`mgEvw;L1)k6e+!p;*X;-Wm)12^j&|#-u!hVt#tc0l`j;EctVNaV01>ctTi`stxx!za=Pp}5A z6mkB12S(r?AUYUL+{A?{(E@)-C0#RN4f#_$S()2SO6S2W3K!QiG+BYM_tH61BDR0V zs_I9pi4IP!$LQdJ|NHj&B$!VoXm_zk+i$SmRw5CTxdg69>h+z9Kp)7$Q$xWL?g6;&bk(!|f}shq>3U;X3g-ciiFj z6_a7EYrBi9$f$YklPeY>B!|v`&qWh*G5Cb z*YJt!wX6*(q424ORnTqhg#lh$)wrzl!J{lD)^W7KQw=aH!T>*wLMeM3AjvHGF2Qaa z((nfKQvSljWI>7260_4ETPbanwwgxbOOAUhqh*>3=op%xShWxY$9-~a#s z>H(7q8XJF+OHRZv3_$lt+#$-WnFJOigwo6ci4(v9OzO7LBu}VB!rqS{rvos z&G{<^amCC&Q77+@S_&kWdQW2~-(Fwt9;M&{)I&&wom`RS+1}n5<|1k>aVMuz<|n1t zO&EcXX-2A|0AhIv#xXM~<7#1eyg(E6kzc#>MerG-kJdy--y|FPinAFzhKHO0CV zQZ}ksqkh0Iyh2!T3p4gxbQrYWvN0o4BkG|M-*;vlv>vv#ZiT&mjTd1=DzLL-qplc2 z`fSa;CBa?VYyO|D>aXlK0F$8>6a~Y02k}X>E*mBRf89>QARNW-O?-!j`Hj=+CgW1$3?-CoOA@WrGbtJQho9dHgm&iRLQagb&34#pOh$`FZS6oBHD zDAYDXPwQoPg#zoC60EEgWatPME%M8YlxxZ>0}rOsU|az<@FZm^*BRP5rzysk??5se zd57}if2J~$IlnbqtQmh}Tfmbzo?!`&3Fa7XfT13Us7DGuAobpqEkJM#B}h=tViFPj z?R8+J{qsVrjGVG?N9{ZHamQ@IJ8^IyY%v(?x{m6kjp6eX{Ia@#Y^#QaYG%a&<*DG5 z8(^+7`Lz;$6)gA4eSV~Pq%>13nRP2a@J#{YL+1F~d~WdMwn&Eu5%_4dD{@kvliR!X z66KS4l7#UzB=a?y(RfP9?6rCS$NeU1cOd%||0m9AoQ#MkK%U~W?~#82lc5$Av#uT5 z2n8$%oI`7qjV4k7#FOAAIsv_t8z)c!+mmc3CL2hlgX^vW005~6000;O0000000000 z00000sFSTHD;#CLOPb&S008O%000yK000000000000000j2V*-8WxiZC=dn1cn9%G zlOHG*0-hR^HYhy;9vzds9VwHZC@KQjACt-`J^?k82`N1RB$G!eF$Odx00000B 0 { - // columns := []schema.Column{} - // for name := range client.Data[0] { - // client.Logger.Debug().Str("name", name).Msg("adding column") - // column := schema.Column{ - // Name: name, - // Description: fmt.Sprintf("The column mapping the %q field from the input data", name), - // Resolver: fetchColumn, - // } - // for _, v := range client.Specs.Keys { - // if name == v { - // client.Logger.Debug().Str("name", name).Msg("column is primary key") - // column.CreationOptions.PrimaryKey = true - // break - // } - // } - // switch strings.ToLower(client.Specs.Types[name]) { - // case "string", "str", "s": - // client.Logger.Debug().Str("name", name).Msg("column is of type string") - // column.Type = schema.TypeString - // case "integer", "int", "i": - // client.Logger.Debug().Str("name", name).Msg("column is of type int") - // column.Type = schema.TypeInt - // case "boolean", "bool", "b": - // client.Logger.Debug().Str("name", name).Msg("column is of type bool") - // column.Type = schema.TypeBool - // default: - // client.Logger.Debug().Str("name", name).Msg("column is of unmapped type, assuming string") - // column.Type = schema.TypeString - // } - // columns = append(columns, column) - // } - // client.Logger.Debug().Msg("returning table") - // return []*schema.Table{ - // { - // Name: client.Specs.Table, - // Resolver: fetchData, - // Columns: columns, - // }, - // }, nil - // } - // return nil, errors.New("no data in file") } +// fetchData reads the input file and unmarshals it into a set of rows using +// format-specific mechanisms, then encodes the information as a map[string]any +// per row and returns it; fetchColumn knows how to pick the data out of this +// map and set it into the resource being returned to ClouqQuery. func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { client := meta.(*client.Client) @@ -265,6 +130,45 @@ func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resou rows = append(rows, row) } } + case "xsl", "xlsx", "excel": + xls, err := excelize.OpenFile(client.Specs.File) + if err != nil { + client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + } + defer func() { + if err := xls.Close(); err != nil { + client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") + } + }() + // get all the rows in the requested (or the active) sheet + if client.Specs.Sheet == nil { + // get the currently active sheet in the file + client.Specs.Sheet = pointer.To(xls.GetSheetName(xls.GetActiveSheetIndex())) + } + client.Logger.Debug().Str("sheet", *client.Specs.Sheet).Msg("getting data from sheet") + xlsrows, err := xls.GetRows(*client.Specs.Sheet) + if err != nil { + client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error getting rows") + return fmt.Errorf("error getting rows from input file %q: %w", client.Specs.File, err) + } + + var keys []string + first := true + for _, xlsrow := range xlsrows { + if first { + first = false + keys = xlsrow + } else { + values := xlsrow + row := map[string]any{} + for i := 0; i < len(keys); i++ { + row[keys[i]] = values[i] + } + rows = append(rows, row) + } + } + // TODO: add more formats default: client.Logger.Error().Str("format", client.Specs.Format).Msg("unsupported format") @@ -279,6 +183,8 @@ func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resou return nil } +// fetchColumn picks the value under the right key from the map[string]any +// and sets it into the resource being returned to CloudQuery. func fetchColumn(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { client := meta.(*client.Client) client.Logger.Debug().Str("resource", format.ToJSON(resource)).Str("column", format.ToJSON(c)).Str("item type", fmt.Sprintf("%T", resource.Item)).Msg("fetching column...") From 33d876adeb523843624103fe6ca84c83333be2d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrea=20Funt=C3=B2?= Date: Thu, 18 May 2023 10:43:46 +0200 Subject: [PATCH 3/3] Fix panic on null cells --- _test/import-xlsx.yaml | 1 + _test/test.csv | 4 ++- _test/test.xlsx | Bin 10622 -> 10679 bytes resources/data.go | 62 ++++++++++++++++++++++++++++++++++------- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/_test/import-xlsx.yaml b/_test/import-xlsx.yaml index 448b3c5..7e44ba2 100644 --- a/_test/import-xlsx.yaml +++ b/_test/import-xlsx.yaml @@ -23,6 +23,7 @@ spec: unique: true - name: optimized type: boolean + notnull: true # comment to get a null - name: count type: integer --- diff --git a/_test/test.csv b/_test/test.csv index 0d8e063..77d06f8 100644 --- a/_test/test.csv +++ b/_test/test.csv @@ -5,4 +5,6 @@ blue,#00f,false,-345 cyan,#0ff,true,678 magenta,#f0f,false,-678 yellow,#ff0,true,901 -black,#000,false,-901 \ No newline at end of file +black,#000,false,-901 +grey,#aaa,false, +white,#fff,,-222 \ No newline at end of file diff --git a/_test/test.xlsx b/_test/test.xlsx index bca2b3b005d2519433e973cd974bc040506f3529..12187e41a624809a6ac2e14ea8e085906cdb692a 100644 GIT binary patch delta 1583 zcmV+~2GIHbQnypEwH6AOAbC7K0{{SX3X{JUAb;C#;xG_~?^oJ)h>e;$uLGrfJON^i@g;-XGT*esi`{AD%r;mrtx@nPxfc6Y((qOCg?7 z_y|9RV5y7%Eor9tmd#mNreqO=T>Uk(;4@4S9On4=yr1vE?Adqe<*cBaQvVRUFO09& z8YUh<3@{W){<>f)gFS$${dj<`nJB>m{(2S1WP%vdmll@idYzK7KZ?RhZv-YwrhoJU zM@Qmi8>L0{r_~s~gGc=C0snXH_`n|z2L0jq|0Y4pW0tz0n$D(D>^#Ua3`avTXQQNl zZauIN)ZU@kd;+WrV!WM&(_jl>%ib0D$BM{S%t3MZIvU!83u|z47z_aRKn%;joEXJ6 zaqJKuD|!wwC_3ViLmZnhbc?i5dw^)NUgzR5&r0qoUYGb;5FgtmqQYr2QNe=c76>sNGgssKaYe zQEa0|?(sSyanBb>&c}6VV7sibU`N-$qS%3TPu2;Ed$d4uKC&YN+hvvoi+`o~;(H6l z4y=2&PDtFt1(NfT9UIs#$t~E)HLxglt}HhVPDtD|0LiJyT38V@I9FW$sCK~IEjNyi z#@X`)Xs~jeyWKxrL}7w{r%UX6!NE2GsbW*fO_^xxd0XTfx?-HYHSOsJ;8QH9YiZ460t}!>w8fq4zIP-O=}n=}qG`v|;jz|AV#&LM|kS zF2FQkDQSe%l5!36B^CbO|-m~@bD~9HTrTUy&al2Qd z!DMsiQ{RfW*O!||(Nu!eLC6VPF=G`+bA6?2rDmy3wG{)=_#|bu14gL(JYrgITrLr4 zIhJ15u^@DHK%^MtL8%8Bf59iw*gPd_#oaP?O8#@g=LQu(2bP&KN>IbLG~OU_ZDU zZaa_UqyOd^QQ=WK2$kjg_gEWg&TKpYb9o*-v5q_E{#lpHpE5rHlc5$A1s&QrPfxQh z8zupNO>V;=5QX=ZdIyWWjY*UrA=^=ZlvN{Ds;gApA($jq4Msrf-+5{25P=s918a}-#|lwf70AVo*8XpvoCC0tWp z8F)062IC5_fhQ?TxlYl}IZZLPd6tk`!7lOHG*0aKGTC_Vz&9h1Br hDU+TkDgl#|$tXVo@RJECJptd7M=3D|?36sATAb(qL+AtV~?x&5MjhBDl%TJ zQ~dMC!_5RErAd{MLR2ipuT0_F`Nt2ly^zmp!Pzr+JVkKVS?u`fB5LWx`lU>7!^-8oOg zX$Vg9S;k=oCUzuQp5nVW`O+KUa6YpE{eR+YuiiW(O;%5=U{tdV)(In%|4N8wBYcFP zBCu3eU@QsMe8(27C{p|>hDB9>>?r0Lh6qn%ynQ~7_h8rTn?xDQ$+pnn#r`wn>y3ti zhme7ZBFSEtOi@?^7&_<;jVl!eSioOYaul)oEId~j+!dIt~qzi;}7!DKibj3)m#2^$_e)g{qnK9gdPV3)y=swQU7#>wE) zd|)C*duPP<6<}46@opZ?!X3m$4~qM)B6bxEP@LY5M{eiR>71TALx4RI!}PBwHe#2! z=Mi@meUBIvE%DeR?%5dHNjj+g_kW-ov5PwJP`iquhYE@ob>gA+EowV|2Nl--`c#eB zMfDc)zN_$pTY%znssoGKE`);$C+2!oBX&`}Snj)ucH`M?zW~MMREHL|T`C84^d3|r zc2Q&hc%6~>=L;m4a~)aOcGozt#PqgCHL00= z;(wvmPcagbLus>L6)CQT)Dx0$4dW#oO09g%Y?TU6-$3(!0F$8>6o2A69T@ij002P( z000yK004MwFLP*Na%E&wbaH8KXLBxiZES3kOHRZv42Jhe+@Z>>nS_x*2&I_?5+{HI znAGh=Bu)-?7J!6q_>^4w+hYy*H{s^0^B%wCefk@#;=BEI=K^ zOxUU!SsmTwg=Lmz$$uqo)k$Q$*P8ta6YxHdNK2&8C4nr@GH4zPLdPdWNl_n+xzz~* zsmAAtq*aF-)l3xLCVcnNox$^ht zSpTi`@&()%0F$8>6a`_cg=bE)W*a5}f8S1nFc`(}O?(H^duSPsEe){vC%bZqi5QJ{ zP5W&l(n8u!w%76%d@&1bh|UY|+Md&2&-v11v6rRy4%!rzN>LaPxz6Hrl;2g?{f9pz1 zX5ChAaLxD|+W?*r5@QLL31%5?fqp#@(Ge;5fUI}A>;QseC_#d<28Th2zr7Z;G=E;` zl#x>w_E5V{$GBs*;JrAw_of((P16KT)WvZ5L;SM3f9$IIg=%NT0cDBclxtwN(%H2V zZsjd^%6@(%c%-ybESa$@H}G`36s?)JRIUW9T@ij002P(000yK000000000000000@)?sYC?*?WtA%Gy z0RRAN0ssIJ00000000000000002`BTC_Vw8ldULJ0a}v+DL((^b0Lq@!LjV8( diff --git a/resources/data.go b/resources/data.go index 992f9a5..129c5c2 100644 --- a/resources/data.go +++ b/resources/data.go @@ -7,6 +7,7 @@ import ( "encoding/json" "fmt" "os" + "reflect" "strings" "github.com/cloudquery/plugin-sdk/schema" @@ -118,16 +119,20 @@ func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resou keys = strings.Split(line, *client.Specs.Separator) } else { values := strings.Split(line, *client.Specs.Separator) - row := map[string]any{} - //for i := 0; i < len(client.Specs.Columns); i++ { - for i := 0; i < len(keys); i++ { - for _, column := range client.Specs.Columns { - if keys[i] == column.Name { - row[client.Specs.Columns[i].Name] = values[i] + if len(values) >= len(keys) { + row := map[string]any{} + for i := 0; i < len(keys); i++ { + for _, column := range client.Specs.Columns { + + if keys[i] == column.Name { + row[client.Specs.Columns[i].Name] = values[i] + } } } + rows = append(rows, row) + } else { + client.Logger.Warn().Str("file", client.Specs.File).Str("line", line).Int("expected", len(keys)).Int("actual", len(values)).Msg("invalid number of columns") } - rows = append(rows, row) } } case "xsl", "xlsx", "excel": @@ -163,7 +168,13 @@ func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resou values := xlsrow row := map[string]any{} for i := 0; i < len(keys); i++ { - row[keys[i]] = values[i] + if i < len(values) { + // XLSX rows can be sparse, in which case all TRAILING empty cells are removed + // from the returned slice; empty cells in the middle are still valid + row[keys[i]] = values[i] + } else { + row[keys[i]] = nil + } } rows = append(rows, row) } @@ -187,7 +198,38 @@ func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resou // and sets it into the resource being returned to CloudQuery. func fetchColumn(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { client := meta.(*client.Client) - client.Logger.Debug().Str("resource", format.ToJSON(resource)).Str("column", format.ToJSON(c)).Str("item type", fmt.Sprintf("%T", resource.Item)).Msg("fetching column...") + // client.Logger.Debug().Str("resource", format.ToJSON(resource)).Str("column", format.ToJSON(c)).Str("item type", fmt.Sprintf("%T", resource.Item)).Msg("fetching column...") item := resource.Item.(map[string]any) - return resource.Set(c.Name, item[c.Name]) + value := item[c.Name] + client.Logger.Debug().Str("value", fmt.Sprintf("%v", value)).Str("type", fmt.Sprintf("%T", value)).Msg("checking value type") + if value == nil { + client.Logger.Warn().Msg("value is nil") + if c.CreationOptions.NotNull { + err := fmt.Errorf("invalid nil value for non-nullable column %s", c.Name) + client.Logger.Error().Err(err).Str("name", c.Name).Msg("error setting column") + return err + } + } else { + client.Logger.Warn().Msg("value is NOT nil") + if reflect.ValueOf(value).IsZero() { + if !c.CreationOptions.NotNull { + // column is nullable, let's null it + client.Logger.Warn().Str("name", c.Name).Msg("nulling column value") + value = nil + } else { + client.Logger.Warn().Msg("set default value for type") + switch c.Type { + case schema.TypeBool: + value = false + case schema.TypeInt: + value = 0 + case schema.TypeString: + value = "" + } + } + } + } + // in XLSX some values may be null, in which case we must + // be sure we're not asking cloudQuery to parse invalid values + return resource.Set(c.Name, value) }