-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.go
85 lines (78 loc) · 2.66 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package main
import (
"context"
"encoding/json"
"os"
"path"
"strconv"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/hotpxl/nvml"
log "github.com/sirupsen/logrus"
"gopkg.in/alecthomas/kingpin.v2"
)
func main() {
app := kingpin.New("nvml-monitor", "Monitor NVML status and upload to etcd.")
duration := app.Flag("duration", "Duration before statistics report.").Default("5s").Duration()
endpoints := app.Flag("endpoints", "Etcd cluster endpoints to connect to.").Required().Strings()
base := app.Flag("base", "Base path of etcd.").Default("/").String()
kingpin.MustParse(app.Parse(os.Args[1:]))
cli, err := clientv3.New(clientv3.Config{
Endpoints: *endpoints,
DialTimeout: 5 * time.Second,
})
if err != nil {
log.WithError(err).Fatal("Failed to connect to etcd.")
}
defer cli.Close()
session, err := nvml.NewSession()
if err != nil {
log.WithError(err).Fatal("Failed to create NVML session.")
}
defer session.Close()
hostname, err := os.Hostname()
if err != nil {
log.WithError(err).Fatal("Failed to retrieve hostname.")
}
for {
devices, err := session.GetAllDevices()
if err != nil {
log.WithError(err).Fatal("Failed to get devices.")
}
for idx, d := range devices {
mem, err := d.MemoryInfo()
if err != nil {
log.WithError(err).Fatal("Failed to get memory information.")
}
_, err = cli.Put(context.Background(), path.Join(*base, hostname, strconv.Itoa(idx), "mem", "free"), strconv.FormatUint(mem.Free, 10))
if err != nil {
log.WithError(err).Fatal("Failed to upload memory information.")
}
_, err = cli.Put(context.Background(), path.Join(*base, hostname, strconv.Itoa(idx), "mem", "used"), strconv.FormatUint(mem.Used, 10))
if err != nil {
log.WithError(err).Fatal("Failed to upload memory information.")
}
_, err = cli.Put(context.Background(), path.Join(*base, hostname, strconv.Itoa(idx), "mem", "total"), strconv.FormatUint(mem.Total, 10))
if err != nil {
log.WithError(err).Fatal("Failed to upload memory information.")
}
processes, err := d.Processes()
if err != nil {
log.WithError(err).Fatal("Failed to get processes.")
}
b, err := json.Marshal(processes)
if err != nil {
log.WithError(err).Fatal("Failed to marshal data.")
}
_, err = cli.Put(context.Background(), path.Join(*base, hostname, strconv.Itoa(idx), "proc"), string(b))
if err != nil {
log.WithError(err).Fatal("Failed to upload process information.")
}
_, err = cli.Put(context.Background(), path.Join(*base, hostname, "timestamp"), strconv.FormatInt(time.Now().Unix(), 10))
if err != nil {
log.WithError(err).Fatal("Failed to update timestamp.")
}
}
time.Sleep(*duration)
}
}