forked from pachyderm/pachyderm
/
map.go
85 lines (72 loc) · 1.46 KB
/
map.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package main
import (
"bufio"
"flag"
"io/ioutil"
"log"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
)
var (
reg *regexp.Regexp
inputDir string
outputDir string
)
func sanitize(word string) []string {
sanitized := reg.ReplaceAllString(word, " ")
return strings.Split(strings.ToLower(sanitized), " ")
}
func main() {
flag.Parse()
args := flag.Args()
if len(args) != 2 {
log.Fatalf("expect two arguments; got %v", len(args))
}
var err error
reg, err = regexp.Compile(`[^A-Za-z]+`)
if err != nil {
log.Fatal(err)
}
inputDir = args[0]
outputDir = args[1]
wordMap := make(map[string]int)
if err := filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
log.Printf("scanning %v", path)
f, err := os.Open(path)
if err != nil {
return err
}
scanner := bufio.NewScanner(f)
scanner.Split(bufio.ScanWords)
count := 0
for scanner.Scan() {
count += 1
for _, word := range sanitize(scanner.Text()) {
if word != "" {
wordMap[word] = wordMap[word] + 1
}
}
}
if err := scanner.Err(); err != nil {
return err
}
log.Printf("found %d words in %s", count, path)
if err := f.Close(); err != nil {
return err
}
return nil
}); err != nil {
log.Fatal(err)
}
for word, count := range wordMap {
if err := ioutil.WriteFile(filepath.Join(outputDir, word), []byte(strconv.Itoa(count)+"\n"), 0644); err != nil {
log.Fatal(err)
}
}
}