https://github.com/kniren/gota

## 필요 패키지

In [4]:
import (
    "fmt"
    "log"
    "os"
    "github.com/kniren/gota/dataframe"
    "github.com/kniren/gota/series"   
)

## 자료 생성

### 열별로(series) 타입을 지정하고 값을 넣어 생성한다.

In [7]:
df := dataframe.New(
	series.New([]string{"ㄱ", "ㄴ"}, series.String, "열1"),
	series.New([]int{1, 2}, series.Int, "열2"),
	series.New([]float64{3.0, 4.0}, series.Float, "열3"),
)

fmt.Println(df)

[2x3] DataFrame

    열1       열2    열3      
 0: ㄱ        1     3.000000
 1: ㄴ        2     4.000000
    <string> <int> <float> 

140
<nil>


### 배열을 사용해 행방향으로 자료 입력

In [10]:
df := dataframe.LoadRecords(
    [][]string{
        []string{"가", "나", "다", "라"},
        []string{"a", "4", "5.1", "true"},
        []string{"k", "5", "7.0", "true"},
        []string{"k", "4", "6.0", "true"},
        []string{"a", "2", "7.1", "false"},
    },
)
fmt.Println(df)

[4x4] DataFrame

    가        나     다        라     
 0: a        4     5.100000 true  
 1: k        5     7.000000 true  
 2: k        4     6.000000 true  
 3: a        2     7.100000 false 
    <string> <int> <float>  <bool>

236
<nil>


### 구조체에 담아서 행방향으로 자료 입력

In [16]:
type 사용자 struct {
	Name     string
	Age      int
	Accuracy float64
    ignored  bool // i소문자는 unexported 된다.
}
users := []User{
	{"아담", 17, 0.2, true},
	{"후안", 18, 0.8, true},
	{"아나", 22, 0.5, true},
}
df := dataframe.LoadStructs(users)
fmt.Println(df)

[3x4] DataFrame

    Name     Age   Accuracy LgoExport_ignored
 0: 아담       17    0.200000 true             
 1: 후안       18    0.800000 true             
 2: 아나       22    0.500000 true             
    <string> <int> <float>  <bool>           

260
<nil>


### 행(배열과 구조체)으로 타입지정하며 입력

In [18]:
df := dataframe.LoadRecords(
    [][]string{
        []string{"가", "나", "다", "라"},
        []string{"a", "4", "5.1", "true"},
        []string{"k", "5", "7.0", "true"},
        []string{"k", "4", "6.0", "true"},
        []string{"a", "2", "7.1", "false"},
    },
    dataframe.DetectTypes(false),
    dataframe.DefaultType(series.Float),
    dataframe.WithTypes(map[string]series.Type{
        "가": series.String,
        "라": series.Bool,
    }),
)
fmt.Println(df)

[4x4] DataFrame

    가        나        다        라     
 0: a        4.000000 5.100000 true  
 1: k        5.000000 7.000000 true  
 2: k        4.000000 6.000000 true  
 3: a        2.000000 7.100000 false 
    <string> <float>  <float>  <bool>

254
<nil>


### 맵으로 여러 묶음을 합치기

In [20]:
df := dataframe.LoadMaps(
    []map[string]interface{}{
        map[string]interface{}{
            "가": "a",
            "B": 1,
            "C": true,
            "D": 0,
        },
        map[string]interface{}{
            "A": "b",
            "B": 2,
            "C": true,
            "D": 0.5,
            "라": 0.2,
        },
    },
)

fmt.Println(df)

[2x6] DataFrame

    A        B     C      D        가        라       
 0:          1     true   0.000000 a        NaN     
 1: b        2     true   0.500000          0.200000
    <string> <int> <bool> <float>  <string> <float> 

234
<nil>


### io.Reader - cvs 형식 문자열 스트링버퍼

In [22]:
import "strings"

csvStr := `
Country,Date,Age,Amount,Id
"United States",2012-02-01,50,112.1,01234
"United States",2012-02-01,32,321.31,54320
"United Kingdom",2012-02-01,17,18.2,12345
"United States",2012-02-01,32,321.31,54320
"United Kingdom",2012-02-01,NA,18.2,12345
"United States",2012-02-01,32,321.31,54320
"United States",2012-02-01,32,321.31,54320
Spain,2012-02-01,66,555.42,00241
`
df := dataframe.ReadCSV(strings.NewReader(csvStr))

fmt.Println(df)

[8x5] DataFrame

    Country        Date       Age   Amount     Id   
 0: United States  2012-02-01 50    112.100000 1234 
 1: United States  2012-02-01 32    321.310000 54320
 2: United Kingdom 2012-02-01 17    18.200000  12345
 3: United States  2012-02-01 32    321.310000 54320
 4: United Kingdom 2012-02-01 NaN   18.200000  12345
 5: United States  2012-02-01 32    321.310000 54320
 6: United States  2012-02-01 32    321.310000 54320
 7: Spain          2012-02-01 66    555.420000 241  
    <string>       <string>   <int> <float>    <int>

548
<nil>


### io.Reader - json 형식 문자열 스트링버퍼

In [24]:
import "strings"
jsonStr := `[{"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":1}]`
df := dataframe.ReadJSON(strings.NewReader(jsonStr))
fmt.Println(df)

[3x3] DataFrame

    COL.1 COL.2 COL.3
 0: NaN   1     3    
 1: 5     2     2    
 2: 6     3     1    
    <int> <int> <int>

128
<nil>


## 자료 선택 

### 행 - 서브셋 하위묶음 

In [27]:
df := dataframe.LoadRecords(
    [][]string{
        []string{"가", "나", "다", "라"},
        []string{"a", "4", "5.1", "true"},
        []string{"k", "5", "7.0", "true"},
        []string{"k", "4", "6.0", "true"},
        []string{"a", "2", "7.1", "false"},
    },
)

sub := df.Subset([]int{0,3})

fmt.Print(sub)

[2x4] DataFrame

    가        나     다        라     
 0: a        4     5.100000 true  
 1: a        2     7.100000 false 
    <string> <int> <float>  <bool>
165
<nil>


In [1]:
// Change column C with a new one
mut := df.Mutate(
    series.New([]string{"a", "b", "c", "d"}, series.String, "C"),
)
// Add a new column E
mut2 := df.Mutate(
    series.New([]string{"a", "b", "c", "d"}, series.String, "E"),
)
fmt.Println(mut)
fmt.Println(mut2)

2:8: undeclared name: df
3:5: undeclared name: series
3:46: undeclared name: series
6:9: undeclared name: df
7:5: undeclared name: series (and 3 more errors)
