-
Notifications
You must be signed in to change notification settings - Fork 8
/
threads.go
198 lines (183 loc) · 5.19 KB
/
threads.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
// Copyright (c) 2022, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package axon
import (
"fmt"
"math"
"runtime"
"sort"
"sync"
"cogentcore.org/core/base/atomiccounter"
"cogentcore.org/core/base/timer"
)
// Maps the given function across the [0, total) range of items, using
// nThreads goroutines, in smaller-sized chunks for better load balancing.
// this may be better for larger number of threads, but is not better for small N
func ParallelChunkRun(fun func(st, ed int), total int, nThreads int) {
chunk := total / (nThreads * 2)
if chunk <= 1 {
fun(0, total)
return
}
chm1 := chunk - 1
wait := sync.WaitGroup{}
var cur atomiccounter.Counter
cur.Set(-1)
for ti := 0; ti < nThreads; ti++ {
wait.Add(1)
go func() {
for {
c := int(cur.Add(int64(chunk)))
if c-chm1 >= total {
wait.Done()
return
}
max := c + 1
if max > total {
max = total
}
fun(c-chm1, max) // end is exclusive
}
}()
}
wait.Wait()
}
// Maps the given function across the [0, total) range of items, using
// nThreads goroutines.
func ParallelRun(fun func(st, ed uint32), total uint32, nThreads int) {
itemsPerThr := uint32(math.Ceil(float64(total) / float64(nThreads)))
wait := sync.WaitGroup{}
for start := uint32(0); start < total; start += itemsPerThr {
start := start // capture into loop-local variable for closure
end := start + itemsPerThr
if end > total {
end = total
}
wait.Add(1) // todo: move out of loop
go func() {
fun(start, end)
wait.Done()
}()
}
wait.Wait()
}
// SetNThreads sets number of threads to use for CPU parallel processing.
// pass 0 to use a default heuristic number based on current GOMAXPROCS
// processors and the number of neurons in the network (call after building)
func (nt *NetworkBase) SetNThreads(nthr int) {
maxProcs := runtime.GOMAXPROCS(0) // query GOMAXPROCS
if nthr <= 0 {
nneur := len(nt.Neurons)
nthr = int(math.Ceil(float64(nneur) / (float64(10000) / float64(nt.MaxData))))
if nthr < 1 { // shouldn't happen but justin..
nthr = 1
}
}
nt.NThreads = min(maxProcs, nthr)
}
// PathMapSeq applies function of given name to all pathways sequentially.
func (nt *NetworkBase) PathMapSeq(fun func(pj *Path), funame string) {
nt.FunTimerStart(funame)
for _, pj := range nt.Paths {
fun(pj)
}
nt.FunTimerStop(funame)
}
// LayerMapSeq applies function of given name to all layers sequentially.
func (nt *NetworkBase) LayerMapSeq(fun func(ly *Layer), funame string) {
nt.FunTimerStart(funame)
for _, ly := range nt.Layers {
fun(ly)
}
nt.FunTimerStop(funame)
}
// LayerMapPar applies function of given name to all layers
// using as many go routines as configured in NetThreads.Neurons.
func (nt *NetworkBase) LayerMapPar(fun func(ly *Layer), funame string) {
if nt.NThreads <= 1 {
nt.LayerMapSeq(fun, funame)
} else {
nt.FunTimerStart(funame)
ParallelRun(func(st, ed uint32) {
for li := st; li < ed; li++ {
ly := nt.Layers[li]
fun(ly)
}
}, uint32(len(nt.Layers)), nt.NThreads)
nt.FunTimerStop(funame)
}
}
// NeuronMapSeq applies function of given name to all neurons sequentially.
func (nt *NetworkBase) NeuronMapSeq(ctx *Context, fun func(ly *Layer, ni uint32), funame string) {
nt.FunTimerStart(funame)
for _, ly := range nt.Layers {
for lni := uint32(0); lni < ly.NNeurons; lni++ {
ni := ly.NeurStIndex + lni
fun(ly, ni)
}
}
nt.FunTimerStop(funame)
}
// NeuronMapPar applies function of given name to all neurons
// using as many go routines as configured in NetThreads.Neurons.
func (nt *NetworkBase) NeuronMapPar(ctx *Context, fun func(ly *Layer, ni uint32), funame string) {
if nt.NThreads <= 1 {
nt.NeuronMapSeq(ctx, fun, funame)
} else {
nt.FunTimerStart(funame)
ParallelRun(func(st, ed uint32) {
for ni := st; ni < ed; ni++ {
li := NrnI(ctx, ni, NrnLayIndex)
ly := nt.Layers[li]
fun(ly, ni)
}
}, nt.NNeurons, nt.NThreads)
nt.FunTimerStop(funame)
}
}
//////////////////////////////////////////////////////////////
// Timing reports
// TimerReport reports the amount of time spent in each function, and in each thread
func (nt *NetworkBase) TimerReport() {
fmt.Printf("TimerReport: %v %d threads\n", nt.Nm, nt.NThreads)
fmt.Printf("\t%13s \t%7s\t%7s\n", "Function Name", "Secs", "Pct")
nfn := len(nt.FunTimes)
fnms := make([]string, nfn)
idx := 0
for k := range nt.FunTimes {
fnms[idx] = k
idx++
}
sort.StringSlice(fnms).Sort()
pcts := make([]float64, nfn)
tot := 0.0
for i, fn := range fnms {
pcts[i] = float64(nt.FunTimes[fn].Total)
tot += pcts[i]
}
for i, fn := range fnms {
fmt.Printf("\t%13s \t%7.3f\t%7.1f\n", fn, pcts[i], 100*(pcts[i]/tot))
}
fmt.Printf("\t%13s \t%7.3f\n", "Total", tot)
}
// FunTimerStart starts function timer for given function name -- ensures creation of timer
func (nt *NetworkBase) FunTimerStart(fun string) {
if !nt.RecFunTimes {
return
}
ft, ok := nt.FunTimes[fun]
if !ok {
ft = &timer.Time{}
nt.FunTimes[fun] = ft
}
ft.Start()
}
// FunTimerStop stops function timer -- timer must already exist
func (nt *NetworkBase) FunTimerStop(fun string) {
if !nt.RecFunTimes {
return
}
ft := nt.FunTimes[fun]
ft.Stop()
}