diff --git a/_test/import-csv.yaml b/_test/import-csv.yaml index 76c7209..a534253 100644 --- a/_test/import-csv.yaml +++ b/_test/import-csv.yaml @@ -14,10 +14,18 @@ spec: format: csv table: my_csv_data separator: "," - keys: [color] - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/import-json.yaml b/_test/import-json.yaml index b058c51..a866c2c 100644 --- a/_test/import-json.yaml +++ b/_test/import-json.yaml @@ -13,10 +13,18 @@ spec: file: ./test.json format: json table: my_json_data - keys: [color] - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/import-xlsx.yaml b/_test/import-xlsx.yaml index 4f80365..7e44ba2 100644 --- a/_test/import-xlsx.yaml +++ b/_test/import-xlsx.yaml @@ -13,10 +13,19 @@ spec: file: ./test.xlsx format: xlsx table: my_xlsx_data - keys: [color] - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + notnull: true # comment to get a null + - name: count + type: integer --- kind: destination spec: diff --git a/_test/import-yaml.yaml b/_test/import-yaml.yaml index 8ddfbd7..ea60bb1 100644 --- a/_test/import-yaml.yaml +++ b/_test/import-yaml.yaml @@ -13,11 +13,18 @@ spec: file: ./test.yaml format: yaml table: my_yaml_data - keys: - - color - types: - value: string - color: string + columns: + - name: color + type: string + key: true + unique: true + - name: value + type: string + unique: true + - name: optimized + type: boolean + - name: count + type: integer --- kind: destination spec: diff --git a/_test/test.csv b/_test/test.csv index f99b87b..77d06f8 100644 --- a/_test/test.csv +++ b/_test/test.csv @@ -1,8 +1,10 @@ -color,value -red,#f00 -green,#0f0 -blue,#00f -cyan,#0ff -magenta,#f0f -yellow,#ff0 -black,#000 \ No newline at end of file +color,value,optimized,count +red,#f00,false,-123 +green,#0f0,true,345 +blue,#00f,false,-345 +cyan,#0ff,true,678 +magenta,#f0f,false,-678 +yellow,#ff0,true,901 +black,#000,false,-901 +grey,#aaa,false, +white,#fff,,-222 \ No newline at end of file diff --git a/_test/test.json b/_test/test.json index c522c1f..30f129b 100644 --- a/_test/test.json +++ b/_test/test.json @@ -1,30 +1,44 @@ [ { "color": "red", - "value": "#f00" + "value": "#f00", + "optimized": true, + "count": 123 }, { "color": "green", - "value": "#0f0" + "value": "#0f0", + "optimized": false, + "count": -123 }, { "color": "blue", - "value": "#00f" + "value": "#00f", + "optimized": true, + "count": -321 }, { "color": "cyan", - "value": "#0ff" + "value": "#0ff", + "optimized": false, + "count": 321 }, { "color": "magenta", - "value": "#f0f" + "value": "#f0f", + "optimized": true, + "count": 456 }, { "color": "yellow", - "value": "#ff0" + "value": "#ff0", + "optimized": false, + "count": -456 }, { "color": "black", - "value": "#000" + "value": "#000", + "optimized": true, + "count": 789 } ] \ No newline at end of file diff --git a/_test/test.xlsx b/_test/test.xlsx index 8b0856f..12187e4 100644 Binary files a/_test/test.xlsx and b/_test/test.xlsx differ diff --git a/_test/test.yaml b/_test/test.yaml index 9316a94..bff1616 100644 --- a/_test/test.yaml +++ b/_test/test.yaml @@ -1,15 +1,29 @@ --- - color: red value: "#f00" + optimized: true + count: 123 - color: green value: "#0f0" + optimized: false + count: -123 - color: blue value: "#00f" + optimized: true + count: -321 - color: cyan value: "#0ff" + optimized: false + count: 321 - color: magenta value: "#f0f" + optimized: true + count: 456 - color: yellow value: "#ff0" + optimized: false + count: -456 - color: black - value: "#000" \ No newline at end of file + value: "#000" + optimized: true + count: 789 \ No newline at end of file diff --git a/client/client.go b/client/client.go index 575a633..c097d78 100644 --- a/client/client.go +++ b/client/client.go @@ -14,7 +14,7 @@ import ( type Client struct { Logger zerolog.Logger Specs *Spec - Data []map[string]any + //Data []map[string]any } func (c *Client) ID() string { diff --git a/client/spec.go b/client/spec.go index 15be9b6..a847a80 100644 --- a/client/spec.go +++ b/client/spec.go @@ -1,11 +1,18 @@ package client +type Column struct { + Name string `json:"name,omitempty" yaml:"name,omitempty"` + Description *string `json:"description,omitempty" yaml:"description,omitempty"` + Type string `json:"type,omitempty" yaml:"type,omitempty"` + Key bool `json:"key,omitempty" yaml:"pk,omitempty"` + Unique bool `json:"unique,omitempty" yaml:"unique,omitempty"` + NotNull bool `json:"notnull,omitempty" yaml:"notnull,omitempty"` +} type Spec struct { - File string `json:"file,omitempty" yaml:"file,omitempty"` - Format string `json:"format,omitempty" yaml:"format,omitempty"` - Table string `json:"table,omitempty" yaml:"table,omitempty"` - Keys []string `json:"keys,omitempty" yaml:"keys,omitempty"` - Types map[string]string `json:"types,omitempty" yaml:"types,omitempty"` - Separator *string `json:"separator,omitempty" yaml:"separator,omitempty"` // CSV only - Sheet *string `json:"sheet,omitempty" yaml:"sheet,omitempty"` // XLSX only + File string `json:"file,omitempty" yaml:"file,omitempty"` + Format string `json:"format,omitempty" yaml:"format,omitempty"` + Table string `json:"table,omitempty" yaml:"table,omitempty"` + Columns []Column `json:"columns,omitempty" yaml:"columns,omitempty"` + Separator *string `json:"separator,omitempty" yaml:"separator,omitempty"` // CSV only + Sheet *string `json:"sheet,omitempty" yaml:"sheet,omitempty"` // XLSX only } diff --git a/resources/data.go b/resources/data.go index 6d913c2..129c5c2 100644 --- a/resources/data.go +++ b/resources/data.go @@ -5,9 +5,9 @@ import ( "bytes" "context" "encoding/json" - "errors" "fmt" "os" + "reflect" "strings" "github.com/cloudquery/plugin-sdk/schema" @@ -18,50 +18,99 @@ import ( "gopkg.in/yaml.v3" ) +// GetTable uses data in the spec section of the client configuration to +// dynamically build the information about the columns being imported. func GetTables(ctx context.Context, meta schema.ClientMeta) (schema.Tables, error) { - client := meta.(*client.Client) - client.Logger.Debug().Str("file", client.Specs.File).Msg("reading input from file") + columns := []schema.Column{} + for _, c := range client.Specs.Columns { + client.Logger.Debug().Str("name", c.Name).Msg("adding column") + if c.Description == nil { + c.Description = pointer.To(fmt.Sprintf("The column mapping the %q field from the input data", c.Name)) + } + column := schema.Column{ + Name: c.Name, + Description: *c.Description, + Resolver: fetchColumn, + CreationOptions: schema.ColumnCreationOptions{ + PrimaryKey: c.Key, + Unique: c.Unique, + NotNull: c.NotNull, + }, + } + switch strings.ToLower(c.Type) { + case "string", "str", "s": + client.Logger.Debug().Str("name", c.Name).Msg("column is of type string") + column.Type = schema.TypeString + case "integer", "int", "i": + client.Logger.Debug().Str("name", c.Name).Msg("column is of type int") + column.Type = schema.TypeInt + case "boolean", "bool", "b": + client.Logger.Debug().Str("name", c.Name).Msg("column is of type bool") + column.Type = schema.TypeBool + default: + client.Logger.Debug().Str("name", c.Name).Msg("column is of unmapped type, assuming string") + column.Type = schema.TypeString + } + columns = append(columns, column) + } + + client.Logger.Debug().Msg("returning table") + return []*schema.Table{ + { + Name: client.Specs.Table, + Resolver: fetchData, + Columns: columns, + }, + }, nil +} - client.Data = []map[string]any{} +// fetchData reads the input file and unmarshals it into a set of rows using +// format-specific mechanisms, then encodes the information as a map[string]any +// per row and returns it; fetchColumn knows how to pick the data out of this +// map and set it into the resource being returned to ClouqQuery. +func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { + client := meta.(*client.Client) + + rows := []map[string]any{} + client.Logger.Debug().Msg("fetching data...") switch strings.ToLower(client.Specs.Format) { case "json": data, err := os.ReadFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") - if err := json.Unmarshal(data, &client.Data); err != nil { + if err := json.Unmarshal(data, &rows); err != nil { client.Logger.Error().Err(err).Msg("error unmarshalling data from JSON") - return nil, fmt.Errorf("error unmarshalling data from JSON: %w", err) + return fmt.Errorf("error unmarshalling data from JSON: %w", err) } case "yaml", "yml": data, err := os.ReadFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") - if err := yaml.Unmarshal(data, &client.Data); err != nil { - client.Logger.Error().Err(err).Msg("error unmarshalling data from JSON") - return nil, fmt.Errorf("error unmarshalling data from JSON: %w", err) + if err := yaml.Unmarshal(data, &rows); err != nil { + client.Logger.Error().Err(err).Msg("error unmarshalling data from YAML") + return fmt.Errorf("error unmarshalling data from YAML: %w", err) } case "csv": data, err := os.ReadFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } client.Logger.Debug().Str("file", client.Specs.File).Msg("input file read") if client.Specs.Separator == nil { client.Specs.Separator = pointer.To(",") } scanner := bufio.NewScanner(bytes.NewReader(data)) - var keys []string - client.Data = []map[string]any{} first := true + var keys []string for scanner.Scan() { line := scanner.Text() client.Logger.Debug().Str("line", line).Msg("read line from input file") @@ -70,114 +119,117 @@ func GetTables(ctx context.Context, meta schema.ClientMeta) (schema.Tables, erro keys = strings.Split(line, *client.Specs.Separator) } else { values := strings.Split(line, *client.Specs.Separator) - entry := map[string]any{} - for i := 0; i < len(keys); i++ { - entry[keys[i]] = values[i] + if len(values) >= len(keys) { + row := map[string]any{} + for i := 0; i < len(keys); i++ { + for _, column := range client.Specs.Columns { + + if keys[i] == column.Name { + row[client.Specs.Columns[i].Name] = values[i] + } + } + } + rows = append(rows, row) + } else { + client.Logger.Warn().Str("file", client.Specs.File).Str("line", line).Int("expected", len(keys)).Int("actual", len(values)).Msg("invalid number of columns") } - client.Data = append(client.Data, entry) } } case "xsl", "xlsx", "excel": xls, err := excelize.OpenFile(client.Specs.File) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") - return nil, fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error reading input file %q: %w", client.Specs.File, err) } defer func() { if err := xls.Close(); err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error reading input file") } }() - // Get all the rows in the Sheet1. + // get all the rows in the requested (or the active) sheet if client.Specs.Sheet == nil { // get the currently active sheet in the file client.Specs.Sheet = pointer.To(xls.GetSheetName(xls.GetActiveSheetIndex())) } client.Logger.Debug().Str("sheet", *client.Specs.Sheet).Msg("getting data from sheet") - rows, err := xls.GetRows(*client.Specs.Sheet) + xlsrows, err := xls.GetRows(*client.Specs.Sheet) if err != nil { client.Logger.Error().Err(err).Str("file", client.Specs.File).Msg("error getting rows") - return nil, fmt.Errorf("error getting rows from input file %q: %w", client.Specs.File, err) + return fmt.Errorf("error getting rows from input file %q: %w", client.Specs.File, err) } var keys []string - client.Data = []map[string]any{} first := true - for _, row := range rows { + for _, xlsrow := range xlsrows { if first { first = false - keys = row + keys = xlsrow } else { - values := row - entry := map[string]any{} + values := xlsrow + row := map[string]any{} for i := 0; i < len(keys); i++ { - entry[keys[i]] = values[i] + if i < len(values) { + // XLSX rows can be sparse, in which case all TRAILING empty cells are removed + // from the returned slice; empty cells in the middle are still valid + row[keys[i]] = values[i] + } else { + row[keys[i]] = nil + } } - client.Data = append(client.Data, entry) + rows = append(rows, row) } } + + // TODO: add more formats default: client.Logger.Error().Str("format", client.Specs.Format).Msg("unsupported format") - return nil, fmt.Errorf("unsupported format: %q", client.Specs.Format) - } + return fmt.Errorf("unsupported format: %q", client.Specs.Format) - if len(client.Data) > 0 { - columns := []schema.Column{} - for name := range client.Data[0] { - client.Logger.Debug().Str("name", name).Msg("adding column") - column := schema.Column{ - Name: name, - Description: fmt.Sprintf("The column mapping the %q field from the input data", name), - Resolver: fetchColumn, - } - for _, v := range client.Specs.Keys { - if name == v { - client.Logger.Debug().Str("name", name).Msg("column is primary key") - column.CreationOptions.PrimaryKey = true - break - } - } - switch strings.ToLower(client.Specs.Types[name]) { - case "string", "str", "s": - client.Logger.Debug().Str("name", name).Msg("column is of type string") - column.Type = schema.TypeString - case "integer", "int", "i": - client.Logger.Debug().Str("name", name).Msg("column is of type int") - column.Type = schema.TypeInt - case "boolean", "bool", "b": - client.Logger.Debug().Str("name", name).Msg("column is of type bool") - column.Type = schema.TypeBool - default: - client.Logger.Debug().Str("name", name).Msg("column is of unmapped type, assuming string") - column.Type = schema.TypeString - } - columns = append(columns, column) - } - client.Logger.Debug().Msg("returning table") - return []*schema.Table{ - { - Name: client.Specs.Table, - Resolver: fetchData, - Columns: columns, - }, - }, nil } - return nil, errors.New("no data in file") -} -func fetchData(ctx context.Context, meta schema.ClientMeta, parent *schema.Resource, res chan<- interface{}) error { - client := meta.(*client.Client) - client.Logger.Debug().Msg("fetching data...") - for _, row := range client.Data { - client.Logger.Debug().Msg("returning single row") + for _, row := range rows { + client.Logger.Debug().Str("row", format.ToJSON(row)).Msg("returning single row") res <- row } return nil } +// fetchColumn picks the value under the right key from the map[string]any +// and sets it into the resource being returned to CloudQuery. func fetchColumn(ctx context.Context, meta schema.ClientMeta, resource *schema.Resource, c schema.Column) error { client := meta.(*client.Client) - client.Logger.Debug().Str("resource", format.ToJSON(resource)).Str("column", format.ToJSON(c)).Str("item type", fmt.Sprintf("%T", resource.Item)).Msg("fetching column...") + // client.Logger.Debug().Str("resource", format.ToJSON(resource)).Str("column", format.ToJSON(c)).Str("item type", fmt.Sprintf("%T", resource.Item)).Msg("fetching column...") item := resource.Item.(map[string]any) - return resource.Set(c.Name, item[c.Name]) + value := item[c.Name] + client.Logger.Debug().Str("value", fmt.Sprintf("%v", value)).Str("type", fmt.Sprintf("%T", value)).Msg("checking value type") + if value == nil { + client.Logger.Warn().Msg("value is nil") + if c.CreationOptions.NotNull { + err := fmt.Errorf("invalid nil value for non-nullable column %s", c.Name) + client.Logger.Error().Err(err).Str("name", c.Name).Msg("error setting column") + return err + } + } else { + client.Logger.Warn().Msg("value is NOT nil") + if reflect.ValueOf(value).IsZero() { + if !c.CreationOptions.NotNull { + // column is nullable, let's null it + client.Logger.Warn().Str("name", c.Name).Msg("nulling column value") + value = nil + } else { + client.Logger.Warn().Msg("set default value for type") + switch c.Type { + case schema.TypeBool: + value = false + case schema.TypeInt: + value = 0 + case schema.TypeString: + value = "" + } + } + } + } + // in XLSX some values may be null, in which case we must + // be sure we're not asking cloudQuery to parse invalid values + return resource.Set(c.Name, value) }