Skip to content

Commit

Permalink
fix(parser.xpath): Handle floating-point times correctly (influxdata#…
Browse files Browse the repository at this point in the history
  • Loading branch information
srebhan authored and dba-leshop committed Oct 30, 2022
1 parent 9d7387b commit ebce146
Show file tree
Hide file tree
Showing 6 changed files with 278 additions and 171 deletions.
235 changes: 119 additions & 116 deletions internal/internal.go
Expand Up @@ -7,17 +7,18 @@ import (
"errors"
"fmt"
"io"
"math"
"math/big"
"math/rand"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"
"unicode"

"github.com/influxdata/telegraf/internal/choice"
)

const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
Expand Down Expand Up @@ -257,145 +258,147 @@ func CompressWithGzip(data io.Reader) (io.ReadCloser, error) {
// The location is a location string suitable for time.LoadLocation. Unix
// times do not use the location string, a unix time is always return in the
// UTC location.
func ParseTimestamp(format string, timestamp interface{}, location string) (time.Time, error) {
func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) {
switch format {
case "unix", "unix_ms", "unix_us", "unix_ns":
return parseUnix(format, timestamp)
sep := []string{",", "."}
if len(separator) > 0 {
sep = separator
}
return parseUnix(format, timestamp, sep)
default:
if location == "" {
location = "UTC"
v, ok := timestamp.(string)
if !ok {
return time.Unix(0, 0), errors.New("unsupported type")
}
return parseTime(format, timestamp, location)
return parseTime(format, v, location)
}
}

func parseUnix(format string, timestamp interface{}) (time.Time, error) {
integer, fractional, err := parseComponents(timestamp)
if err != nil {
return time.Unix(0, 0), err
}

switch strings.ToLower(format) {
// parseTime parses a timestamp in unix format with different resolutions
func parseUnix(format string, timestamp interface{}, separator []string) (time.Time, error) {
// Extract the scaling factor to nanoseconds from "format"
var factor int64
switch format {
case "unix":
return time.Unix(integer, fractional).UTC(), nil
factor = int64(time.Second)
case "unix_ms":
return time.Unix(0, integer*1e6).UTC(), nil
factor = int64(time.Millisecond)
case "unix_us":
return time.Unix(0, integer*1e3).UTC(), nil
factor = int64(time.Microsecond)
case "unix_ns":
return time.Unix(0, integer).UTC(), nil
default:
return time.Unix(0, 0), errors.New("unsupported type")
factor = int64(time.Nanosecond)
}
}

// Returns the integers before and after an optional decimal point. Both '.'
// and ',' are supported for the decimal point. The timestamp can be an int64,
// float64, or string.
//
// ex: "42.5" -> (42, 5, nil)
func parseComponents(timestamp interface{}) (int64, int64, error) {
switch ts := timestamp.(type) {
case string:
parts := strings.SplitN(ts, ".", 2)
if len(parts) == 2 {
return parseUnixTimeComponents(parts[0], parts[1])
}
zero := time.Unix(0, 0)

parts = strings.SplitN(ts, ",", 2)
if len(parts) == 2 {
return parseUnixTimeComponents(parts[0], parts[1])
// Convert the representation to time
switch v := timestamp.(type) {
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
t, err := ToInt64(v)
if err != nil {
return zero, err
}

integer, err := strconv.ParseInt(ts, 10, 64)
return time.Unix(0, t*factor).UTC(), nil
case float32, float64:
ts, err := ToFloat64(v)
if err != nil {
return 0, 0, err
return zero, err
}
return integer, 0, nil
case int8:
return int64(ts), 0, nil
case int16:
return int64(ts), 0, nil
case int32:
return int64(ts), 0, nil
case int64:
return ts, 0, nil
case uint8:
return int64(ts), 0, nil
case uint16:
return int64(ts), 0, nil
case uint32:
return int64(ts), 0, nil
case uint64:
return int64(ts), 0, nil
case float32:
integer, fractional := math.Modf(float64(ts))
return int64(integer), int64(fractional * 1e9), nil
case float64:
integer, fractional := math.Modf(ts)
return int64(integer), int64(fractional * 1e9), nil
default:
return 0, 0, errors.New("unsupported type")

// Parse the float as a precise fraction to avoid precision loss
f := big.Rat{}
if f.SetFloat64(ts) == nil {
return zero, errors.New("invalid number")
}
return timeFromFraction(&f, factor), nil
case string:
// Sanitize the string to have no thousand separators and dot
// as decimal separator to ease later parsing
v = sanitizeTimestamp(v, separator)

// Parse the string as a precise fraction to avoid precision loss
f := big.Rat{}
if _, ok := f.SetString(v); !ok {
return zero, errors.New("invalid number")
}
return timeFromFraction(&f, factor), nil
}

return zero, errors.New("unsupported type")
}

func parseUnixTimeComponents(first, second string) (int64, int64, error) {
integer, err := strconv.ParseInt(first, 10, 64)
if err != nil {
return 0, 0, err
}
func timeFromFraction(f *big.Rat, factor int64) time.Time {
// Extract the numerator and denominator and scale to nanoseconds
num := f.Num()
denom := f.Denom()
num.Mul(num, big.NewInt(factor))

// Convert to nanoseconds, dropping any greater precision.
buf := []byte("000000000")
copy(buf, second)
// Get the integer (non-fractional part) of the timestamp and convert
// it into time
t := big.Int{}
t.Div(num, denom)

fractional, err := strconv.ParseInt(string(buf), 10, 64)
if err != nil {
return 0, 0, err
}
return integer, fractional, nil
return time.Unix(0, t.Int64()).UTC()
}

// ParseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp interface{}, location string) (time.Time, error) {
switch ts := timestamp.(type) {
case string:
loc, err := time.LoadLocation(location)
if err != nil {
return time.Unix(0, 0), err
// sanitizeTimestamp removes thousand separators and uses dot as
// decimal separator. Returns also a boolean indicating success.
func sanitizeTimestamp(timestamp string, decimalSeparartor []string) string {
// Remove thousand-separators that are not used for decimal separation
sanitized := timestamp
for _, s := range []string{" ", ",", "."} {
if !choice.Contains(s, decimalSeparartor) {
sanitized = strings.ReplaceAll(sanitized, s, "")
}
switch strings.ToLower(format) {
case "ansic":
format = time.ANSIC
case "unixdate":
format = time.UnixDate
case "rubydate":
format = time.RubyDate
case "rfc822":
format = time.RFC822
case "rfc822z":
format = time.RFC822Z
case "rfc850":
format = time.RFC850
case "rfc1123":
format = time.RFC1123
case "rfc1123z":
format = time.RFC1123Z
case "rfc3339":
format = time.RFC3339
case "rfc3339nano":
format = time.RFC3339Nano
case "stamp":
format = time.Stamp
case "stampmilli":
format = time.StampMilli
case "stampmicro":
format = time.StampMicro
case "stampnano":
format = time.StampNano
}

// Replace decimal separators by dot to have a standard, parsable format
for _, s := range decimalSeparartor {
// Make sure we replace only the first occurrence of any separator.
if strings.Contains(sanitized, s) {
return strings.Replace(sanitized, s, ".", 1)
}
return time.ParseInLocation(format, ts, loc)
default:
return time.Unix(0, 0), errors.New("unsupported type")
}
return sanitized
}

// parseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp string, location string) (time.Time, error) {
loc, err := time.LoadLocation(location)
if err != nil {
return time.Unix(0, 0), err
}

switch strings.ToLower(format) {
case "ansic":
format = time.ANSIC
case "unixdate":
format = time.UnixDate
case "rubydate":
format = time.RubyDate
case "rfc822":
format = time.RFC822
case "rfc822z":
format = time.RFC822Z
case "rfc850":
format = time.RFC850
case "rfc1123":
format = time.RFC1123
case "rfc1123z":
format = time.RFC1123Z
case "rfc3339":
format = time.RFC3339
case "rfc3339nano":
format = time.RFC3339Nano
case "stamp":
format = time.Stamp
case "stampmilli":
format = time.StampMilli
case "stampmicro":
format = time.StampMicro
case "stampnano":
format = time.StampNano
}
return time.ParseInLocation(format, timestamp, loc)
}

0 comments on commit ebce146

Please sign in to comment.