-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
promtail.go
298 lines (269 loc) · 7.97 KB
/
promtail.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
package promtail
import (
"crypto/md5"
"errors"
"fmt"
"os"
"os/signal"
"sync"
"syscall"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/grafana/loki/v3/clients/pkg/logentry/stages"
"github.com/grafana/loki/v3/clients/pkg/promtail/api"
"github.com/grafana/loki/v3/clients/pkg/promtail/client"
"github.com/grafana/loki/v3/clients/pkg/promtail/config"
"github.com/grafana/loki/v3/clients/pkg/promtail/server"
"github.com/grafana/loki/v3/clients/pkg/promtail/targets"
"github.com/grafana/loki/v3/clients/pkg/promtail/targets/target"
"github.com/grafana/loki/v3/clients/pkg/promtail/utils"
"github.com/grafana/loki/v3/clients/pkg/promtail/wal"
util_log "github.com/grafana/loki/v3/pkg/util/log"
)
const (
timeoutUntilFanoutHardStop = time.Second * 30
)
var reloadSuccessTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "promtail",
Name: "config_reload_success_total",
Help: "Number of reload success times.",
})
var reloadFailTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "promtail",
Name: "config_reload_fail_total",
Help: "Number of reload fail times.",
})
var errConfigNotChange = errors.New("config has not changed")
// Option is a function that can be passed to the New method of Promtail and
// customize the Promtail that is created.
type Option func(p *Promtail)
// WithLogger overrides the default logger for Promtail.
func WithLogger(log log.Logger) Option {
return func(p *Promtail) {
p.logger = log
}
}
// WithRegisterer overrides the default registerer for Promtail.
func WithRegisterer(reg prometheus.Registerer) Option {
return func(p *Promtail) {
p.reg = reg
}
}
// Promtail is the root struct for Promtail.
type Promtail struct {
client client.Client
walWriter *wal.Writer
entriesFanout api.EntryHandler
targetManagers *targets.TargetManagers
server server.Server
logger log.Logger
reg prometheus.Registerer
stopped bool
mtx sync.Mutex
configLoaded string
newConfig func() (*config.Config, error)
metrics *client.Metrics
dryRun bool
}
// New makes a new Promtail.
func New(cfg config.Config, newConfig func() (*config.Config, error), metrics *client.Metrics, dryRun bool, opts ...Option) (*Promtail, error) {
// Initialize promtail with some defaults and allow the options to override
// them.
promtail := &Promtail{
logger: util_log.Logger,
reg: prometheus.DefaultRegisterer,
metrics: metrics,
dryRun: dryRun,
}
for _, o := range opts {
// todo (callum) I don't understand why I needed to add this check
if o == nil {
continue
}
o(promtail)
}
err := promtail.reg.Register(reloadSuccessTotal)
if err != nil {
return nil, fmt.Errorf("error register prometheus collector reloadSuccessTotal :%w", err)
}
err = promtail.reg.Register(reloadFailTotal)
if err != nil {
return nil, fmt.Errorf("error register prometheus collector reloadFailTotal :%w", err)
}
err = promtail.reloadConfig(&cfg)
if err != nil {
return nil, err
}
server, err := server.New(cfg.ServerConfig, promtail.logger, promtail.targetManagers, cfg.String())
if err != nil {
return nil, fmt.Errorf("error creating loki server: %w", err)
}
promtail.server = server
promtail.newConfig = newConfig
return promtail, nil
}
func (p *Promtail) reloadConfig(cfg *config.Config) error {
level.Debug(p.logger).Log("msg", "Reloading configuration file")
p.mtx.Lock()
defer p.mtx.Unlock()
newConfigFile := cfg.String()
if newConfigFile == p.configLoaded {
return errConfigNotChange
}
newConf := cfg.String()
level.Info(p.logger).Log("msg", "Reloading configuration file", "md5sum", fmt.Sprintf("%x", md5.Sum([]byte(newConf))))
if p.targetManagers != nil {
p.targetManagers.Stop()
}
if p.client != nil {
p.client.Stop()
}
cfg.Setup(p.logger)
if cfg.LimitsConfig.ReadlineRateEnabled {
stages.SetReadLineRateLimiter(cfg.LimitsConfig.ReadlineRate, cfg.LimitsConfig.ReadlineBurst, cfg.LimitsConfig.ReadlineRateDrop)
}
var err error
// entryHandlers contains all sinks were scraped log entries should get to
var entryHandlers = []api.EntryHandler{}
// TODO: Refactor all client instantiation inside client.Manager
cfg.PositionsConfig.ReadOnly = cfg.PositionsConfig.ReadOnly || p.dryRun
if p.dryRun {
p.client, err = client.NewLogger(p.metrics, p.logger, cfg.ClientConfigs...)
if err != nil {
return err
}
cfg.PositionsConfig.ReadOnly = true
} else {
var notifier client.WriterEventsNotifier = client.NilNotifier
if cfg.WAL.Enabled {
p.walWriter, err = wal.NewWriter(cfg.WAL, p.logger, p.reg)
if err != nil {
return fmt.Errorf("failed to create wal writer: %w", err)
}
// If WAL is enabled, the walWriter should notify the manager of new WAL writes, and it should as well
// be an entry handler where the processing pipeline writes to
notifier = p.walWriter
entryHandlers = append(entryHandlers, p.walWriter)
}
p.client, err = client.NewManager(
p.metrics,
p.logger,
cfg.LimitsConfig,
p.reg,
cfg.WAL,
notifier,
cfg.ClientConfigs...,
)
if err != nil {
return fmt.Errorf("failed to create client manager: %w", err)
}
}
entryHandlers = append(entryHandlers, p.client)
p.entriesFanout = utils.NewFanoutEntryHandler(timeoutUntilFanoutHardStop, entryHandlers...)
tms, err := targets.NewTargetManagers(p, p.reg, p.logger, cfg.PositionsConfig, p.entriesFanout, cfg.ScrapeConfig, &cfg.TargetConfig, cfg.Global.FileWatch)
if err != nil {
return err
}
p.targetManagers = tms
promServer := p.server
if promServer != nil {
promtailServer, ok := promServer.(*server.PromtailServer)
if !ok {
return errors.New("promtailServer cast fail")
}
promtailServer.ReloadServer(p.targetManagers, cfg.String())
}
p.configLoaded = newConf
return nil
}
// Run the promtail; will block until a signal is received.
func (p *Promtail) Run() error {
p.mtx.Lock()
// if we stopped promtail before the server even started we can return without starting.
if p.stopped {
p.mtx.Unlock()
return nil
}
p.mtx.Unlock() // unlock before blocking
go p.watchConfig()
return p.server.Run()
}
// Client returns the underlying client Promtail uses to write to Loki.
func (p *Promtail) Client() client.Client {
return p.client
}
// Shutdown the promtail.
func (p *Promtail) Shutdown() {
p.mtx.Lock()
defer p.mtx.Unlock()
if p.stopped {
return
}
p.stopped = true
if p.server != nil {
p.server.Shutdown()
}
if p.targetManagers != nil {
p.targetManagers.Stop()
}
if p.entriesFanout != nil {
p.entriesFanout.Stop()
}
if p.walWriter != nil {
p.walWriter.Stop()
}
// todo work out the stop.
p.client.Stop()
}
// ActiveTargets returns active targets per jobs from the target manager
func (p *Promtail) ActiveTargets() map[string][]target.Target {
return p.targetManagers.ActiveTargets()
}
func (p *Promtail) watchConfig() {
// Reload handler.
if p.newConfig == nil {
level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "Promtail newConfig func is Empty")
return
}
switch srv := p.server.(type) {
case *server.NoopServer:
level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "Promtail server is disabled")
return
case *server.PromtailServer:
level.Warn(p.logger).Log("msg", "enable watchConfig")
hup := make(chan os.Signal, 1)
signal.Notify(hup, syscall.SIGHUP)
for {
select {
case <-hup:
_ = p.reload()
case rc := <-srv.Reload():
if err := p.reload(); err != nil {
rc <- err
} else {
rc <- nil
}
}
}
default:
level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "Unknown Promtail server type")
return
}
}
func (p *Promtail) reload() error {
cfg, err := p.newConfig()
if err != nil {
reloadFailTotal.Inc()
return fmt.Errorf("Error new Config: %w", err)
}
err = p.reloadConfig(cfg)
if err != nil {
reloadFailTotal.Inc()
level.Error(p.logger).Log("msg", "Error reloading config", "err", err)
return err
}
reloadSuccessTotal.Inc()
return nil
}