forked from rai-project/dlframework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.go
117 lines (94 loc) · 2.85 KB
/
agent.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package registryquery
import (
"encoding/json"
"path"
"runtime"
"strings"
"sync"
"github.com/rai-project/parallel/tunny"
"github.com/pkg/errors"
"github.com/rai-project/config"
webmodels "github.com/rai-project/dlframework/httpapi/models"
store "github.com/rai-project/libkv/store"
kv "github.com/rai-project/registry"
)
func (m modelsTy) Agents(frameworkName, frameworkVersion, modelName, modelVersion string) ([]*webmodels.DlframeworkAgent, error) {
frameworkName = strings.ToLower(frameworkName)
frameworkVersion = strings.ToLower(frameworkVersion)
modelName = strings.ToLower(modelName)
modelVersion = strings.ToLower(modelVersion)
manifests, err := Models.Manifests(frameworkName, frameworkVersion)
if err != nil {
return nil, err
}
if len(manifests) == 0 {
return nil, errors.Errorf("no models found for the framework %s:%s", frameworkName, frameworkVersion)
}
manifests, err = Models.FilterManifests(manifests, modelName, modelVersion)
if err != nil {
return nil, err
}
rgs, err := kv.New()
if err != nil {
return nil, err
}
defer rgs.Close()
var agentsLock sync.Mutex
var wg sync.WaitGroup
set := make(map[string]bool)
agents := []*webmodels.DlframeworkAgent{}
poolSize := runtime.NumCPU()
pool, err := tunny.CreatePool(poolSize, func(object interface{}) interface{} {
kvs, ok := object.(*store.KVPair)
if !ok {
return errors.New("invalid kv type. expecting a KVPair type")
}
key := kvs.Key
val := kvs.Value
keyBase := path.Base(key)
if !strings.HasPrefix(keyBase, "agent-") {
return errors.Errorf("skipping non agent %s", keyBase)
}
hostPort := strings.Split(strings.TrimPrefix(keyBase, "agent-"), ":")
host, port := hostPort[0], hostPort[1]
agentsLock.Lock()
defer agentsLock.Unlock()
if _, ok := set[keyBase]; ok {
return nil
}
agent := &webmodels.DlframeworkAgent{}
err := json.Unmarshal(val, agent)
if err != nil {
log.WithError(err).WithField("host", host).WithField("port", port).Error("failed to unmarshal agent")
return nil
}
agents = append(agents, agent)
set[keyBase] = true
return nil
}).Open()
if err != nil {
return nil, err
}
defer pool.Close()
prefixKey := path.Join(config.App.Name, "predictor")
for _, model := range manifests {
frameworkName = strings.ToLower(model.Framework.Name)
frameworkVersion = strings.ToLower(model.Framework.Version)
modelName = strings.ToLower(model.Name)
modelVersion = strings.ToLower(model.Version)
// TODO:: the use of frameworkVersion here is not correct, since it won't support frameworkVersion=1.x.x for example
key := path.Join(prefixKey, frameworkName, frameworkVersion, modelName, modelVersion)
kvs, err := rgs.List(key)
if err != nil {
continue
}
for _, kv := range kvs {
wg.Add(1)
pool.SendWorkAsync(kv, func(interface{}, error) {
wg.Done()
})
}
}
wg.Wait()
return agents, nil
}