Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: check datetime type only for small input data #6

Merged
merged 3 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .idea/.gitignore

This file was deleted.

9 changes: 0 additions & 9 deletions .idea/gota.iml

This file was deleted.

8 changes: 0 additions & 8 deletions .idea/modules.xml

This file was deleted.

6 changes: 0 additions & 6 deletions .idea/vcs.xml

This file was deleted.

23 changes: 18 additions & 5 deletions dataframe/dataframe.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,8 @@ type loadOptions struct {

// Define a func for trimming the raw data
customTrimer CustomTrimer

parseDatetimeCount int
}

// CustomTrimFn custom raw data trimer
Expand Down Expand Up @@ -755,6 +757,12 @@ func WithTypes(coltypes map[string]series.Type) LoadOption {
}
}

func WithDatetimeParsingConstraint(maxCount int) LoadOption {
return func(c *loadOptions) {
c.parseDatetimeCount = maxCount
}
}

// LoadRecords creates a new DataFrame based on the given records.
func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
// Load the options
Expand Down Expand Up @@ -813,7 +821,7 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
if !ok {
t = cfg.defaultType
if cfg.detectTypes {
t = findType(rawcol, cfg.detectTypeThreshold)
t = findType(rawcol, cfg.detectTypeThreshold, cfg.parseDatetimeCount)
}
}
types[i] = t
Expand Down Expand Up @@ -1276,7 +1284,8 @@ func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int,
return idx, nil
}

func findType(arr []string, threshold float64) series.Type {
func findType(arr []string, threshold float64, maxCountForDatetimeParse int) series.Type {
checkDatetime := maxCountForDatetimeParse == 0 || len(arr) < maxCountForDatetimeParse

counter := map[series.Type]int{
series.Float: 0,
Expand All @@ -1303,10 +1312,14 @@ func findType(arr []string, threshold float64) series.Type {
counter[series.Bool]++
continue
}
if _, e := series.ParseDateTime(str); e == nil {
counter[series.Time]++
continue

if checkDatetime {
if _, e := series.ParseDateTime(str); e == nil {
counter[series.Time]++
continue
}
}

counter[series.String]++
}

Expand Down