Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 384 lines (346 sloc) 11.201 kb
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
1 // Copyright (c) 2012 VMware, Inc.
2
3 package gonit
4
5 import (
6 "fmt"
7 "github.com/cloudfoundry/gosigar"
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
8 "math"
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
9 "strconv"
10 "time"
11 )
12
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
13 // Bug(lisbakke): figure out how we change maxdatatostore when two rules are using the same resource, because they'll both be using the same resourceholder.
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
14
15 // This interface allows us to mock sigar in unit tests.
16 type SigarInterface interface {
744416a @lisbakke Some cleanup.
lisbakke authored
17 getMemResident(pid int) (uint64, error)
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
18 getProcTime(pid int) (uint64, error)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
19 }
20
21 type SigarGetter struct{}
22
23 // Gets the Resident memory of a process.
24 func (s *SigarGetter) getMemResident(pid int) (uint64, error) {
25 mem := sigar.ProcMem{}
26 if err := mem.Get(pid); err != nil {
27 return 0, fmt.Errorf("Couldnt get mem for pid '%v'.", pid)
28 }
29 return mem.Resident, nil
30 }
31
744416a @lisbakke Some cleanup.
lisbakke authored
32 // Gets the proc time and a timestamp and returns a DataTimestamp.
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
33 func (s *SigarGetter) getProcTime(pid int) (uint64, error) {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
34 procTime := sigar.ProcTime{}
35 if err := procTime.Get(pid); err != nil {
744416a @lisbakke Some cleanup.
lisbakke authored
36 return 0, fmt.Errorf("Couldnt get proctime for pid '%v'.", pid)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
37 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
38 return procTime.Total, nil
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
39 }
40
744416a @lisbakke Some cleanup.
lisbakke authored
41 // Don't create more.
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
42 type ResourceManager struct {
43 resourceHolders []*ResourceHolder
44 sigarInterface SigarInterface
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
45 // Used by eventmonitor to cache resources so they don't get pulled multiple
46 // times when multiple rules are being checked for the same resource.
47 cachedResources map[string]uint64
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
48 }
49
50 type ResourceHolder struct {
51 processName string
52 resourceName string
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
53 dataTimestamps []*DataTimestamp
54 firstEntryIndex int64
55 maxDataToStore int64
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
56 }
57
744416a @lisbakke Some cleanup.
lisbakke authored
58 var resourceManager ResourceManager = ResourceManager{
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
59 sigarInterface: &SigarGetter{},
60 cachedResources: map[string]uint64{},
744416a @lisbakke Some cleanup.
lisbakke authored
61 }
62
63 type DataTimestamp struct {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
64 data uint64
65 nanoTimestamp int64
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
66 }
67
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
68 const (
69 INTERVAL_MARGIN_ERR = 0.05
70 NANO_TO_MILLI = float64(time.Millisecond)
71 )
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
72
744416a @lisbakke Some cleanup.
lisbakke authored
73 const (
74 CPU_PERCENT_NAME = "cpu_percent"
75 MEMORY_USED_NAME = "memory_used"
76 )
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
77
78 var validResourceNames = map[string]bool{
79 MEMORY_USED_NAME: true,
80 CPU_PERCENT_NAME: true,
81 }
82
744416a @lisbakke Some cleanup.
lisbakke authored
83 // Cleans data from ResourceManager.
84 func (r *ResourceManager) CleanData() {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
85 r.resourceHolders = []*ResourceHolder{}
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
86 r.ClearCachedResources()
87 }
88
89 // Cleans up the resource data used for a process's event monitors.
90 func (r *ResourceManager) CleanDataForProcess(p *Process) {
91 for _, resourceHolder := range r.resourceHolders {
92 if resourceHolder.processName == p.Name {
93 resourceHolder.dataTimestamps = []*DataTimestamp{}
94 resourceHolder.firstEntryIndex = 0
95 }
96 }
97 r.ClearCachedResources()
744416a @lisbakke Some cleanup.
lisbakke authored
98 }
99
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
100 // Get the nth entry in the data. Accepts a negaitve number, as well, so that
101 // the last etc. can be referenced.
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
102 func (r *ResourceHolder) getNthData(index int64) (*DataTimestamp, error) {
744416a @lisbakke Some cleanup.
lisbakke authored
103 data := r.dataTimestamps
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
104 dataLen := int64(len(data))
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
105 if index < 0 {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
106 if index+dataLen < int64(0) {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
107 return nil, fmt.Errorf("Cannot have a negative index '%v' larger than "+
108 "the data length '%v'.", index, dataLen)
109 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
110 if dataLen == r.maxDataToStore {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
111 lastIndex := r.firstEntryIndex + index
112 if lastIndex < 0 {
113 lastIndex = dataLen + lastIndex
114 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
115 return data[lastIndex], nil
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
116 } else {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
117 return data[dataLen+index], nil
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
118 }
119 } else {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
120 return data[index], nil
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
121 }
122 panic("Can't get here.")
123 }
124
125 // Given an array of data which is of type ProcUsedTimestamp, will return the
126 // percent of proc time that was used.
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
127 func (r *ResourceHolder) calculateProcPercent(point1 *DataTimestamp,
128 point2 *DataTimestamp) (uint64, error) {
129 lastProc := float64(point1.data)
130 secondLastProc := float64(point2.data)
131 lastMilli := float64(point1.nanoTimestamp) / NANO_TO_MILLI
132 secondLastMilli := float64(point2.nanoTimestamp) / NANO_TO_MILLI
133 return uint64(100 * (lastProc - secondLastProc) / (lastMilli - secondLastMilli)), nil
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
134 }
135
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
136 // Gets a ResourceHolder, given a ParsedEvent. Creates a new one if one doesn't
137 // exist.
138 func (r *ResourceManager) getResourceHolder(
139 parsedEvent *ParsedEvent) *ResourceHolder {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
140 processName := parsedEvent.processName
141 resourceName := parsedEvent.resourceName
142 for _, resourceHolder := range r.resourceHolders {
143 if resourceHolder.processName == processName &&
144 resourceHolder.resourceName == resourceName {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
145 return resourceHolder
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
146 }
147 }
148 resourceHolder := &ResourceHolder{
149 processName: processName,
150 resourceName: resourceName,
151 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
152 interval := float64(parsedEvent.interval.Seconds())
153 duration := float64(parsedEvent.duration.Seconds())
154 if duration == 0.0 {
155 duration = interval
156 }
157
158 resourceHolder.maxDataToStore = int64(math.Ceil(duration / interval))
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
159 r.resourceHolders = append(r.resourceHolders, resourceHolder)
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
160 return resourceHolder
161 }
162
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
163 // Sets an entry in the resources cache.
164 func (r *ResourceManager) setCachedResource(resourceName string, value uint64) {
165 r.cachedResources[resourceName] = value
166 }
167
168 // Gets an entry in the resources cache.
169 func (r *ResourceManager) getCachedResource(
170 resourceName string) (uint64, bool) {
171 value, has_key := r.cachedResources[resourceName]
172 return value, has_key
173 }
174
175 // Clears the resources cache.
176 func (r *ResourceManager) ClearCachedResources() {
177 r.cachedResources = map[string]uint64{}
178 }
179
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
180 // Given a ParsedEvent, will populate the correct resourceHolder with the
181 // resource value.
182 func (r *ResourceManager) gatherResource(parsedEvent *ParsedEvent,
183 pid int) error {
184 resourceHolder := r.getResourceHolder(parsedEvent)
744416a @lisbakke Some cleanup.
lisbakke authored
185 if err := r.gather(pid, resourceHolder); err != nil {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
186 return err
187 }
188 return nil
189 }
190
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
191 // Returns the average of an array of DataTimestamps.
192 func averageDataTimestampArray(array []*DataTimestamp) uint64 {
193 sum := uint64(0)
194 for _, val := range array {
195 sum += val.data
196 }
197 return sum / uint64(len(array))
198 }
199
200 // Gets all entries in a resource holder since a nanosecond unix timestamp.
201 func (r *ResourceHolder) getEntriesSince(
202 nanosecondStart int64) []*DataTimestamp {
203 entries := []*DataTimestamp{}
204 index := int64(-1)
205 for {
206 dataTimestamp, err := r.getNthData(index)
207 index--
208 if err != nil {
209 break
210 }
211 entries = append(entries, dataTimestamp)
212 if nanosecondStart > dataTimestamp.nanoTimestamp {
213 break
214 }
215 }
216 return entries
217 }
218
219 // Given an array of DataTimestamp entries, and a few other parameters, this
220 // function will return the average over the duration.
221 func (r *ResourceHolder) getDurationData(entries []*DataTimestamp,
222 duration time.Duration, interval time.Duration,
223 resourceName string) (uint64, error) {
224 first := entries[0]
225 last := entries[len(entries)-1]
226 marginErr := 1 - INTERVAL_MARGIN_ERR
227 errDuration := int64(marginErr * float64(duration.Nanoseconds()))
228 // Add interval because if we have 3 entries that are 1s interval then we have
229 // 2s as time covered, but really it covers 3s.
230 timeDataCovers := first.nanoTimestamp - last.nanoTimestamp +
231 (interval.Nanoseconds())
232 if timeDataCovers > errDuration {
233 if resourceName == MEMORY_USED_NAME {
234 return averageDataTimestampArray(entries), nil
235 } else if resourceName == CPU_PERCENT_NAME {
236 return r.calculateProcPercent(first, last)
237 }
238 }
239 return 0, nil
240 }
241
242 // Gets the current data for an event.
243 func (r *ResourceHolder) getData(parsedEvent *ParsedEvent) (uint64, error) {
244 resourceName := parsedEvent.resourceName
245 duration := parsedEvent.duration
246 interval := parsedEvent.interval
247 if (duration.Seconds() / interval.Seconds()) > 1 {
248 timeNow := time.Now().UnixNano()
249 entries := r.getEntriesSince(timeNow - duration.Nanoseconds())
250 if len(entries) <= 1 {
251 return 0, nil
252 }
253 return r.getDurationData(entries, duration, interval, resourceName)
254 } else {
255 // If we're not dealing with a duration.
256 data, err := r.getNthData(-1)
257 return data.data, err
258 }
259 panic("Can't get here.")
260 }
261
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
262 // Takes a ParsedEvent and pid and returns the value for the resource used in
263 // the rule.
264 func (r *ResourceManager) GetResource(parsedEvent *ParsedEvent,
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
265 pid int) (uint64, error) {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
266 resourceName := parsedEvent.resourceName
267 processName := parsedEvent.processName
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
268
269 data, has_key := r.getCachedResource(resourceName)
270 if has_key {
271 return data, nil
272 }
273
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
274 if err := r.gatherResource(parsedEvent, pid); err != nil {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
275 return 0, err
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
276 }
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
277
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
278 for _, resourceHolder := range r.resourceHolders {
744416a @lisbakke Some cleanup.
lisbakke authored
279 lenResourceData := len(resourceHolder.dataTimestamps)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
280 if resourceHolder.processName == processName &&
281 resourceHolder.resourceName == resourceName && lenResourceData > 0 {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
282 data, err := resourceHolder.getData(parsedEvent)
283 if err != nil {
284 return 0, err
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
285 }
cb0d39f @lisbakke Start/stop/restart actions and main integration.
lisbakke authored
286 r.setCachedResource(resourceName, data)
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
287 return data, nil
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
288 }
289 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
290 return 0,
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
291 fmt.Errorf("Could not find resource value for resource %v on process %v.",
292 resourceName, processName)
293 }
294
295 // A utility function that parses a rule's amount string and returns the value
296 // as the correct type. For instance, in a rule such as 'memory_used > 14mb'
297 // the amount is '14mb' which this function would turn into a uint64 of
298 // 14*1024*1024.
299 func (r *ResourceManager) ParseAmount(resourceName string,
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
300 amount string) (uint64, error) {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
301 if resourceName == MEMORY_USED_NAME {
302 lenAmount := len(amount)
303 if lenAmount < 3 {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
304 return 0, fmt.Errorf("%v '%v' is not the correct format.",
744416a @lisbakke Some cleanup.
lisbakke authored
305 resourceName, amount)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
306 }
307 units := amount[lenAmount-2:]
308 amount = amount[0 : lenAmount-2]
309 amountUi, err := strconv.ParseUint(amount, 10, 64)
310 if err != nil {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
311 return 0, err
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
312 }
313 switch units {
314 case "kb":
315 amountUi *= 1024
316 case "mb":
317 amountUi *= 1024 * 1024
318 case "gb":
319 amountUi *= 1024 * 1024 * 1024
320 default:
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
321 return 0, fmt.Errorf("Invalid units '%v' on '%v'.",
744416a @lisbakke Some cleanup.
lisbakke authored
322 units, resourceName)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
323 }
324 return amountUi, nil
325 } else if resourceName == CPU_PERCENT_NAME {
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
326 amountUi, err := strconv.ParseUint(amount, 10, 64)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
327 return amountUi, err
328 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
329 return 0, fmt.Errorf("Unknown resource name %v.", resourceName)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
330 }
331
332 // Saves a data point into the data array of the ResourceHolder.
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
333 func (r *ResourceHolder) saveData(dataToSave uint64) {
334 dataTimestamp := &DataTimestamp{
744416a @lisbakke Some cleanup.
lisbakke authored
335 data: dataToSave,
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
336 nanoTimestamp: time.Now().UnixNano(),
744416a @lisbakke Some cleanup.
lisbakke authored
337 }
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
338 timestampLen := int64(len(r.dataTimestamps))
339 if timestampLen > r.maxDataToStore {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
340 panic("This shouldn't happen.")
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
341 } else if timestampLen == r.maxDataToStore {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
342 // Clear out old data.
744416a @lisbakke Some cleanup.
lisbakke authored
343 r.dataTimestamps[r.firstEntryIndex] = dataTimestamp
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
344 r.firstEntryIndex++
2dff394 @lisbakke Adding duration support to eventmanager.
lisbakke authored
345 if r.firstEntryIndex == r.maxDataToStore {
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
346 r.firstEntryIndex = 0
347 }
348 } else {
744416a @lisbakke Some cleanup.
lisbakke authored
349 r.dataTimestamps = append(r.dataTimestamps, dataTimestamp)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
350 }
351 }
352
744416a @lisbakke Some cleanup.
lisbakke authored
353 // Gets the data for a resource and saves it to the ResourceHolder.
354 func (r *ResourceManager) gather(pid int,
355 resourceHolder *ResourceHolder) error {
356 if resourceHolder.resourceName == MEMORY_USED_NAME {
357 mem, err := r.sigarInterface.getMemResident(pid)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
358 if err != nil {
359 return err
360 }
744416a @lisbakke Some cleanup.
lisbakke authored
361 resourceHolder.saveData(mem)
362 } else if resourceHolder.resourceName == CPU_PERCENT_NAME {
363 procTime, err := r.sigarInterface.getProcTime(pid)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
364 if err != nil {
365 return err
366 }
744416a @lisbakke Some cleanup.
lisbakke authored
367 resourceHolder.saveData(procTime)
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
368 }
369 return nil
370 }
371
372 // Checks to see if a resource is a valid resource.
373 func (r *ResourceManager) IsValidResourceName(resourceName string) bool {
374 if _, hasKey := validResourceNames[resourceName]; hasKey {
375 return true
376 }
377 return false
378 }
379
380 // Allows the sigar interface to be set so that tests can set it.
381 func (r *ResourceManager) SetSigarInterface(sigar SigarInterface) {
744416a @lisbakke Some cleanup.
lisbakke authored
382 r.sigarInterface = sigar
ffeaf56 @lisbakke Configmanager, resourcemanager and eventmonitor.
lisbakke authored
383 }
Something went wrong with that request. Please try again.