-
Notifications
You must be signed in to change notification settings - Fork 1
/
word_count_in_go.go
69 lines (52 loc) · 1.67 KB
/
word_count_in_go.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package main
import (
"flag"
"strings"
"github.com/chrislusf/gleamold/distributed"
"github.com/chrislusf/gleamold/flow"
"github.com/chrislusf/gleamold/gio"
)
var (
MapperTokenizer = gio.RegisterMapper(tokenize)
MapperAddOne = gio.RegisterMapper(addOne)
ReducerSum = gio.RegisterReducer(sum)
isDistributed = flag.Bool("distributed", false, "run in distributed or not")
isDockerCluster = flag.Bool("onDocker", false, "run in docker cluster")
)
func main() {
flag.Parse() // optional, since gio.Init() will call this also.
gio.Init() // If the command line invokes the mapper or reducer, execute it and exit.
f := flow.New().TextFile("/etc/passwd").
Mapper(MapperTokenizer). // invoke the registered "tokenize" mapper function.
Mapper(MapperAddOne). // invoke the registered "addOne" mapper function.
ReducerBy(ReducerSum). // invoke the registered "sum" reducer function.
Sort(flow.OrderBy(2, true)).
Printlnf("%s\t%d")
if *isDistributed {
println("Running in distributed mode.")
f.Run(distributed.Option())
} else if *isDockerCluster {
println("Running in docker cluster.")
f.Run(distributed.Option().SetMaster("master:45326"))
} else {
println("Running in standalone mode.")
f.Run()
}
}
func tokenize(row []interface{}) error {
line := string(row[0].([]byte))
for _, s := range strings.FieldsFunc(line, func(r rune) bool {
return !('A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9')
}) {
gio.Emit(s)
}
return nil
}
func addOne(row []interface{}) error {
word := string(row[0].([]byte))
gio.Emit(word, 1)
return nil
}
func sum(x, y interface{}) (interface{}, error) {
return x.(uint64) + y.(uint64), nil
}