-
Notifications
You must be signed in to change notification settings - Fork 0
/
Stats.hs
116 lines (98 loc) · 4.18 KB
/
Stats.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
-- hisg - IRC stats generator.
--
-- Copyright (c) 2009, 2010 Antoine Kalmbach <antoine dot kalmbach at jyu dot fi>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
-- * Redistributions of source code must retain the above copyright
-- notice, this list of conditions and the following disclaimer.
-- * Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
-- * Neither the name of the author nor the
-- names of its contributors may be used to endorse or promote products
-- derived from this software without specific prior written permission.
--
-- For further details, see LICENSE.
module Hisg.Stats (
isMessage,
-- getDates,
-- getKicks,
calcMessageStats,
calcMessageStats'',
processMessages,
updateMap
-- getNicks,
) where
import Data.List
import Data.Maybe
import Control.Monad
import Control.Monad.State
import qualified Data.Map as M
import qualified Data.Set as Set
import qualified Data.ByteString.Char8 as S
import qualified Data.ByteString.Lazy.Char8 as L
import Text.Regex.PCRE.Light (compile, match)
import Hisg.Types
import Hisg.MapReduce
import Hisg.Formats.Irssi
--instance Ord User where
-- (User n1 w1 l1) <= (User n2 w2 l2) = l1 <= l2
-- (User n1 w1 l1) > (User n2 w2 l2) = l1 > l2
--
--instance Eq User where
--(User n1 w1 l1) == (User n2 w2 l2) = n1 == n2 && w1 == w2 && l1 == l2
-- | The StatsM monad allows us to work on a changing set of log events.
-- As a result, we can prune what we want from the log while calculating stats,
-- thus reducing the size of the log file by not having to travel through
-- every bit of (un-lazy) data.
type StatsM = State Log
-- | Gets the messages from the current log and updates the state with the remaining non-messages.
takeMessages :: StatsM Log
takeMessages = do
evts <- get
put $ map snd $ (map (partition isMessage) evts)
return $ map fst $ (map (partition isMessage) evts)
isMessage (Message _ _ _) = True
isMessage _ = False
workMessageStats :: StatsM [(S.ByteString, (Int, Int))]
workMessageStats = do
msgs <- takeMessages
return $ processMessages msgs
--workKickStats :: StatsM [(S.ByteString, Int)]
--workKickStats = diio
-- msgs <- takeKicks
--
calcMessageStats :: Log -> [(S.ByteString, (Int, Int))]
calcMessageStats log = processMessages log
calcMessageStats'' :: [L.ByteString] -> M.Map S.ByteString (Int, Int)
calcMessageStats'' = mapReduce rwhnf (foldl' updateWLC M.empty . L.lines)
rwhnf (M.unionsWith (sumTuples))
where
updateWLC map line =
case match (compile normalMessage []) (conv line) [] of
Just (_:ts:nick:contents)
-> M.insertWith' (sumTuples) nick (1, length . S.words . S.concat $ contents) map
_ -> map
conv = S.concat . L.toChunks
--calcKickStats :: Log -> [(S.ByteString, Int)]
--calcKickStats = evalState (workKickStats)
--toUser (n, l, w) = User n l w
--toUser' (n, l) = User n l 0
processMessages :: Log -> [(S.ByteString, (Int, Int))]
processMessages log = M.toList $ mapReduce rwhnf (foldl' updateWLC M.empty)
rwhnf (M.unionsWith (sumTuples)) log
processMessages' :: Log -> [(S.ByteString, Int)]
processMessages' log = M.toList $ mapReduce rwhnf (foldl' updateWLC' M.empty)
rwhnf (M.unionsWith (+)) log
-- | Alias for insertWith (it's shorter!)
updateMap :: (Ord k) => (a -> a -> a) -> k -> a -> M.Map k a -> M.Map k a
updateMap fn key value = M.insertWith fn key value
updateWLC :: M.Map S.ByteString (Int, Int) -> LogEvent -> M.Map S.ByteString (Int, Int)
updateWLC map (Message ts nick line) = updateMap sumTuples nick (1, length $ S.words line) map
updateWLC map _ = map
updateWLC' :: M.Map S.ByteString Int -> LogEvent -> M.Map S.ByteString Int
updateWLC' map (Message ts nick line) = updateMap (+) nick 1 map
updateWLC' map _ = map
sumTuples (a,b) (c,d) = (a+c, b+d)