/
name.go
104 lines (89 loc) · 1.9 KB
/
name.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// SPDX-FileCopyrightText: 2023 Sascha Brawer <sascha@brawer.ch>
// SPDX-License-Identifier: MIT
package main
import (
"bytes"
"context"
"encoding/csv"
"golang.org/x/sync/errgroup"
"io"
"sync"
"github.com/lanrat/extsort"
)
type Name struct {
Name string
ID string
}
func (n Name) ToBytes() []byte {
var buf bytes.Buffer
buf.WriteString(n.Name)
buf.WriteRune(0)
buf.WriteString(n.ID)
return buf.Bytes()
}
func NameFromBytes(b []byte) extsort.SortType {
for i, ch := range b {
if ch == 0 {
return Name{Name: string(b[0:i]), ID: string(b[i+1 : len(b)])}
}
}
return Name{}
}
func NameIsLess(a, b extsort.SortType) bool {
return a.(Name).Name < b.(Name).Name
}
type NameWriter struct {
mutex sync.Mutex
closed bool
writer *csv.Writer
sortChan chan extsort.SortType
sortTask *errgroup.Group
}
func NewNameWriter(w io.Writer) (*NameWriter, error) {
writer := csv.NewWriter(w)
if err := writer.Write([]string{"Name", "WikidataID"}); err != nil {
return nil, err
}
inChan := make(chan extsort.SortType, 50000)
sorter, outChan, errChan := extsort.New(inChan, NameFromBytes, NameIsLess, nil)
task, ctx := errgroup.WithContext(context.Background())
task.Go(func() error {
sorter.Sort(ctx)
return nil
})
task.Go(func() error {
for n := range outChan {
name := n.(Name)
if err := writer.Write([]string{name.Name, name.ID}); err != nil {
return err
}
}
if err := <-errChan; err != nil {
return err
}
return nil
})
return &NameWriter{
writer: writer,
sortChan: inChan,
sortTask: task,
}, nil
}
func (w *NameWriter) WriteName(n *Name) error {
w.sortChan <- *n
return nil
}
func (w *NameWriter) Close() error {
w.mutex.Lock()
defer w.mutex.Unlock()
if w.closed {
return nil // already closed before, no work left to do
}
w.closed = true
close(w.sortChan)
if err := w.sortTask.Wait(); err != nil {
return err
}
w.writer.Flush()
return nil
}