/
TimePlot.hs
262 lines (248 loc) · 14 KB
/
TimePlot.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
{-# LANGUAGE ScopedTypeVariables, TypeFamilies, ParallelListComp, CPP, BangPatterns, TemplateHaskell #-}
module Main where
import Paths_timeplot (version)
import Data.Version (showVersion)
import Distribution.VcsRevision.Git
import Language.Haskell.TH.Syntax
import Control.Lens
import Control.Monad
import Data.Default
import Data.List
import Data.Ord
import qualified Data.Map as M
import qualified Data.ByteString.Char8 as S
import System.Environment
import System.Exit
import Data.Time hiding (parseTime)
import Graphics.Rendering.Chart
import Graphics.Rendering.Chart.Backend.Cairo
import Tools.TimePlot.Types
import Tools.TimePlot.Conf
import Tools.TimePlot.Source
import Tools.TimePlot.Plots
import Tools.TimePlot.Render
import qualified Tools.TimePlot.Incremental as I
-- Assume events are sorted.
-- Pass 1:
-- * Compute min/max times
-- * Compute unique track names
--
-- Then map track names to plotters (track types).
--
-- Pass 2:
-- * Generate plot data (one-pass multiplexed to tracks)
--
makeChart :: (S.ByteString -> [(ChartKind LocalTime, S.ByteString)]) ->
IO (ParseResult LocalTime) ->
Maybe LocalTime -> Maybe LocalTime ->
(LocalTime -> String -> String) ->
IO (Renderable ())
makeChart chartKindF parseEvents minT maxT transformLabel = do
ParseResult events unparseable <- parseEvents
when (not (null unparseable)) $ do
putStrLn $ "Unparseable lines found" ++ (if null (drop 10 unparseable) then ":" else " (showing first 10):")
mapM_ (putStrLn . S.unpack) (take 10 unparseable)
if null events
then return emptyRenderable
else do
let dropLateEvents es = case maxT of Just t -> takeWhile ((<t) . fst) es ; Nothing -> es
-- Pass 1: find out min/max time and final track names.
let i2o t (KindNone, _) = []
i2o t (KindWithin mapName sk, suf) = [(S.append (mapName t) suf, sk)]
i2o t (k, suf) = [(S.append t suf, k)]
let i2oTracks t = concatMap (i2o t) (chartKindF t)
let t0 = fst (head events)
let (minTime, maxTime, outTracks) = foldl'
(\(!mi,!ma,!ts) (t,e) -> (min t mi, max t ma, foldr (uncurry M.insert) ts (i2oTracks $ evt_track e)))
(t0, t0, M.empty)
(dropLateEvents events)
let minOutTime = case minT of Just t -> t ; Nothing -> minTime
let maxOutTime = case maxT of Just t -> t ; Nothing -> maxTime
let transformLabels axis = axis & axis_labels %~ map (map (\(t, s) -> (t, transformLabel t s)))
let commonTimeAxis = transformLabels $ autoAxis [minOutTime, maxOutTime]
-- Pass 2
events' <- (dropLateEvents . parsedData) `fmap` parseEvents
let eventsToTracks = [(outTrack, (t,e)) | (t,e) <- events', (outTrack,_) <- i2oTracks (evt_track e)]
let initPlot track = initGen (outTracks M.! track) (S.unpack track) minTime maxTime
let plots = I.runStreamSummary (I.byKey initPlot) eventsToTracks
-- Render
return $ renderStackedLayouts $
slayouts_layouts .~ map (dataToPlot commonTimeAxis (minOutTime,maxOutTime)) (M.elems plots) $
def
showHelp = mapM_ putStrLn [ "",
"tplot - a tool for drawing timing diagrams.",
" See http://www.haskell.org/haskellwiki/Timeplot",
"Usage: tplot [-o OFILE] [-of {png|pdf|ps|svg}] [-or 640x480]",
" -if IFILE [-tf TF] ",
" [{+|-}k Pat1 '[+Suf1] Kind1' {+|-}k Pat2 '[+Suf2] Kind2' ...] [{+|-}dk '[+Suf] KindN']",
" [-fromTime TIME] [-toTime TIME] [-baseTime TIME]",
" --version - show version information",
" --help - show this help",
" -o OFILE - output file",
" -of - output format (default: extension of -o)",
" -or - output resolution (default 640x480)",
" -if IFILE - input file; '-' means 'read from stdin'",
" NOTE: for large datasets, use actual files, not stdin,",
" as tplot can operate on them in streaming mode, which",
" allows it to use a lot less memory and work on virtually",
" unbounded datasets",
" -tf TF - time format: -tf 'date PATTERN' means that times are dates in the format",
" specified by PATTERN - see http://linux.die.net/man/3/strptime, ",
" for example, -tf 'date [%Y-%m-%d %H:%M:%S]' parses dates like ",
" '[2009-10-20 16:52:43]'.",
" We also support %OS for fractional seconds (i.e. %OS will",
" parse 12.4039 or 12,4039) and %^[+-][N]s for ten-powers ",
" of seconds since epoch, for example %^-3s is ms since epoch.",
" -tf elapsed means times are fractional seconds since an unknown moment.",
" Default: 'date %Y-%m-%d %H:%M:%OS'",
" {+|-}dk - set default diagram kind",
" {+|-}k P K - set diagram kind for tracks matching regex P (in the format",
" of regex-tdfa, which is at least POSIX-compliant and",
" supports some GNU extensions) to K",
" EXPLANATION:",
" -k clauses are matched till first success, +k are all",
" matched: a track is drawn acc. to all matching +k, to +dk",
" AND ALSO to the first matching -k, or -dk if none of -k",
" match",
" EXPLANATION OF SUF:",
" If '+Suf' is present (e.g. +k request '+frequency freq 60'),",
" then '.Suf' is appended to the input track name while mapping it",
" to the output track. This is so that a single input track can",
" participate in many output tracks - tplot can have only one output",
" track with a particular name, so you can't have one track named",
" 'request' which draws 'freq 60' and another one for 'hist 60' - ",
" you should use +k request '+frequency freq 60' +k request '+histogram hist 60'",
" and you'll get output tracks 'request.frequency' and 'request.histogram'",
" -fromTime - filter records whose time is >= this time",
" (formatted according to -tf)",
" -toTime - filter records whose time is < this time",
" (formatted according to -tf)",
" -baseTime - display time difference with this value instead of absolute time",
" (formatted according to -tf)",
"",
"Input format: lines of the following form:",
"1234 >A - at time 1234, activity A has begun",
"1234 <A - at time 1234, activity A has ended",
"1234 !B - at time 1234, pulse event B has occured",
"1234 !B TEXT - at time 1234, pulse event B has occured with label TEXT",
"1234 @B COLOR - at time 1234, the status of B became such that it is",
" appropriate to draw it with color COLOR :)",
"1234 =C VAL - at time 1234, parameter C had numeric value VAL (for example,",
" HTTP response time)",
"1234 =D `EVENT - at time 1234, event EVENT occured in process D (for",
" example, HTTP response code)",
"It is assumed that many events of the same kind may occur at once.",
"Diagram kinds:",
" 'none' - do not plot this track",
" 'event' is for event diagrams: activities are drawn like --[===]--- ,",
" pulse events like --|-- with a label over '|'",
" 'duration [drop] XXXX' - plot any kind of diagram over the *durations* of events",
" on a track (delimited by > ... <), for example 'duration quantile",
" 300 0.25,0.5,0.75' will plot these quantiles of durations of the",
" events. This is useful where your log looks like 'Started processing'",
" ... 'Finished processing': you can plot processing durations without",
" computing them yourself. Very useful inside 'within'!",
" If you use 'drop', then names of the original input tracks will be dropped",
" before putting the events onto the output track, e.g. an event rtime.14e3ac1",
" when used by 'within[.] duration drop dots', will be put onto the output track",
" 'rtime', with input track 'rtime'. When used by 'within[.] duration dots',",
" its input track will still be rtime.14e3ac1. The difference is whether",
" the output of 'duration' appears to 'XXXX' as a single or multiple input tracks.",
" E.g. if you're measuring durations of processing unique requests with rtime.REQID,",
" then use 'drop'; if it's durations of processing at certain stages with rtime.STAGE",
" then don't.",
" 'within[C] XXXX' - draw plot XXXX over events grouped by their track's name ",
" before separator C. For example, if you have processes",
" named 'MACHINE-PID' (i.e. UNIT027-8532) say 'begin something' / ",
" 'end something' and you're interested in the properties of per-machine",
" durations, use within[-] duration dots; or if you've got jobs starting",
" and finishing tasks on different machines, and you want to plot a diagram",
" showing the number of utilized machines and how this number is composed of",
" utilization by different jobs, make your trace say '>job-JOBID'...'<job-JOBID'",
" and use -k job 'within[-] count 1'.",
" Explanation: if you specify -k REGEX 'within[.] SOMETHING', timeplot will",
" take all tracks matching REGEX, split each track around the first '.', giving",
" a 'supertrack' and 'subtrack' (e.g. customer.John -> customer, John), ",
" group the events by supertrack and for each supertrack draw a graphical track",
" using the plot type SOMETHING. It's up to SOMETHING to do something with these",
" events, e.g. 'lines' will simply draw several line plots, one per subtrack.",
" 'acount N' is for activity counts: a histogram is drawn with granularity",
" of N time units, where the bin corresponding to [t..t+N) has value",
" 'what was the average number of active events or impulses in that",
" interval'. When used inside 'within', the histogram is a stacked one,",
" with one vertical bar per subtrack in each bin.",
" 'count N' is same as 'acount N' but scaled by the bin size, i.e. rather than ",
" activity rate per time unit, it's activity count per time bin. ",
" 'apercent N B' is for activity percentages of a basis: like 'acount N',",
" but instead of X you get 100*X/B",
" 'afreq N' is for activity frequencies: it's like acount, but relative",
" rather than absolute - it only makes sense inside 'within', because",
" otherwise it would just always show a filled one-coloured bar in every bin.",
" 'freq N [TYPE]' is for event frequency histograms: a histogram of type",
" TYPE (stacked or clustered, default clustered) is drawn for each time",
" bin of size N, about the *frequency* of various ` events",
" 'hist N [TYPE]' is for event count histograms: a histogram of type TYPE",
" (stacked or clustered, default clustered) is drawn for each time bin",
" of size N, about the *counts* of various ` events",
" 'quantile N q1,q2,..' (example: quantile 100 0.25,0.5,0.75) - a bar chart",
" of corresponding quantiles in time bins of size N",
" 'binf N v1,v2,..' (example: binf 100 1,2,5,10) - a histogram of frequency",
" of values falling into bins min..v1, v1..v2, .., v2..max in time bins",
" of size N",
" 'binh N v1,v2,..' (example: binf 100 1,2,5,10) - a histogram of counts of",
" values falling into bins min..v1, v1..v2, .., v2..max in time bins of",
" size N",
" 'lines' - a simple line plot of numeric values. When used in 'within', ",
" gives one plot per subtrack.",
" 'dots' - a simple dot plot of numeric values. When used in 'within', ",
" gives one plot per subtrack.",
" 'dots ALPHA' - a simple dot plot of numeric values. When used in 'within', ",
" gives one plot per subtrack. All dots are drawn with opacity ALPHA,",
" where 0 means transparent and 1 means opaque. Useful when you're suffering",
" from overplotting (dots overlapping each other too much)",
" 'cumsum N [TYPE]' - a simple line plot of the running sum of the numeric values,",
" over bins of size N - as if you plotted 'sum N', but sums are accumulated over time",
" When used in 'within', produce 1 subplot per subtrack. TYPE can be: ",
" 'overlayed' -> just lay the subplots over one another.",
" 'stacked' -> add them up at each point to see how subtracks contribute",
" to the total cumulative sum (default; only makes sense inside 'within')",
" 'sum N [TYPE]' - a simple line plot of the sum of the numeric values in time",
" bins of size N. N is measured in units or in seconds.",
" When used in 'within', produce 1 subplot per subtrack. TYPE used in same ",
" way as in cumsum."
]
showGitVersion = $(do
v <- qRunIO getRevision
lift $ case v of
Nothing -> "<none>"
Just (hash,True) -> hash ++ " (with local modifications)"
Just (hash,False) -> hash)
main = do
args <- getArgs
mainWithArgs args
mainWithArgs args = do
when (null args || args == ["--help"]) $ showHelp >> exitSuccess
when (null args || args == ["--version"]) $ do
putStrLn ("This is timeplot-" ++ showVersion version ++ " (git " ++ showGitVersion ++ ")") >> exitSuccess
let !conf = readConf args
let format = case outFormat conf of {
OutPNG -> PNG
; OutPDF -> PDF
; OutPS -> PS
; OutSVG -> SVG
}
let render r w h f = renderableToFile (fo_size .~ (w, h) $ fo_format .~ format $ def) f r
case conf of
ConcreteConf {
parseTime=parseTime, inFile=inFile, chartKindF=chartKindF,
outFile=outFile, outResolution=outResolution,
fromTime=fromTime, toTime=toTime, transformLabel=transformLabel } -> do
source <- case inFile of
"-" -> do
putStrLn "Warning: working in non-streaming mode (for very large datasets, supply input from a file)"
events <- readSource parseTime inFile
return (return events)
_ -> return (readSource parseTime inFile)
chart <- makeChart chartKindF source fromTime toTime transformLabel
let (w,h) = outResolution
render chart w h outFile