Skip to content

Commit

Permalink
Optimization, optimization, optimization, optimization... still using…
Browse files Browse the repository at this point in the history
… way too much memory
  • Loading branch information
ane committed Sep 18, 2010
1 parent 49b00df commit 43ba253
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 33 deletions.
3 changes: 2 additions & 1 deletion src/Hisg/Core.hs
Expand Up @@ -77,8 +77,9 @@ processFiles = do

formatLog :: String -> IRCLog -> FormatterM String
formatLog chan logf = do
let popular (_, (_, a)) (_, (_, b)) = compare b a
insertHeaders chan
insertScoreboard (take 15 (reverse . sort $ calcMessageStats (contents logf)))
insertScoreboard (take 15 . sortBy popular $ calcMessageStats (contents logf))
insertFooter "0.1.0"
getFinalOutput

Expand Down
9 changes: 7 additions & 2 deletions src/Hisg/Formats/Base.hs
@@ -1,9 +1,14 @@
module Hisg.Formats.Base (
validNickChars
validNickChars,
validChanChars
) where

import qualified Data.ByteString.Char8 as S

-- | Valid nickname characters according to IRC RFC 2812 are A-Z, a-z, {}[]`^\|.
validNickChars :: String
validNickChars = "[A-Za-z\\[\\]\\\\`_\\^\\{\\|\\}"
validNickChars = "A-Za-z\\[\\]\\\\`_\\^\\{\\|\\}"

-- | Valid channel characters are "any octet except NUL, BELL, CR, LF, " ", "," and ":""
validChanChars :: String
validChanChars = "^\\r\\n\\0\\s,:"
11 changes: 10 additions & 1 deletion src/Hisg/Formats/Irssi.hs
Expand Up @@ -11,18 +11,27 @@ module Hisg.Formats.Irssi (
) where

import Hisg.Types
import Hisg.Formats.Base (validNickChars)
import Hisg.Formats.Base
import qualified Data.ByteString.Char8 as S


-- | The timestamp is the time at which any event occurred.
timeStamp :: String
timeStamp = "(\\d{2}:\\d{2})"

nickName :: String
nickName = "<.([" ++ validNickChars ++ "]+)>"

channelName :: String
channelName = "[#&!+]([" ++ validChanChars ++ "]+)"

-- | The normalMessage represents a normal IRC channel-directed PRIVMSG, e.g.:
-- 11:00 < Kalroth> DOCTOR IS THAT MY BONE STICKING OUT AHHH
-- The capture group consists of three parts: timestamp, nickname and message.
normalMessage :: S.ByteString
normalMessage = S.intercalate (S.pack "\\s") $ map S.pack [timeStamp, nickName, "(.*)"]

-- | The kick is a kick, containing four elements: kick target, kicker, channel and the reason, e.g.:
-- 07:00 -!- rzz_ was kicked from #elitistjerks by CrazyDazed [Rule 1: Don't be stupid]
kick :: S.ByteString
kick = S.intercalate (S.pack "\\s") $ map S.pack [timeStamp, "-!-", nickName, "was kicked from", channelName, "\\[(.*)\\]"]
8 changes: 6 additions & 2 deletions src/Hisg/Formatter.hs
Expand Up @@ -70,7 +70,11 @@ insertFooter :: String -> FormatterM ()
insertFooter ver = do
addOutput $ "</div><div id=\"footer\"><p>Generated with <a href=\"http://ane.github.com/hisg\">hisg</a> v" ++ ver ++ "</p></div></body></html>"

insertScoreboard :: [User] -> FormatterM ()
insertScoreboard :: [(S.ByteString, (Int, Int))] -> FormatterM ()
insertScoreboard users = do
addOutput "<h2>Top 15 users</h2>"
addOutput $ "<table>\n<tr><th>Nickname</th><th>Lines</th><th>Words</th></tr>" ++ concatMap (\(rank, u) -> "<tr><td><b>" ++ show rank ++ ".</b> " ++ S.unpack (userNick u) ++ "</td><td>" ++ show (userLines u) ++ "</td><td>" ++ show (userWords u) ++ "</td></tr>") (zip [1..] users) ++ "</table>"
addOutput $ "<table>\n<tr><th>Nickname</th><th>Lines</th><th>Words</th></tr>"
++ concatMap (\(rank, u) -> "<tr><td><b>" ++ show rank ++ ".</b> "
++ S.unpack (fst u) ++ "</td><td>" ++ show ((snd . snd) u)
++ "</td><td>" ++ show ((fst . snd) u)
++ "</td></tr>") (zip [1..] users) ++ "</table>"
53 changes: 33 additions & 20 deletions src/Hisg/Stats.hs
Expand Up @@ -37,44 +37,53 @@ import qualified Data.ByteString.Lazy.Char8 as S
import Hisg.Types
import Hisg.MapReduce

instance Ord User where
(User n1 w1 l1) <= (User n2 w2 l2) = l1 <= l2
(User n1 w1 l1) > (User n2 w2 l2) = l1 > l2

instance Eq User where
(User n1 w1 l1) == (User n2 w2 l2) = n1 == n2 && w1 == w2 && l1 == l2
--instance Ord User where
-- (User n1 w1 l1) <= (User n2 w2 l2) = l1 <= l2
-- (User n1 w1 l1) > (User n2 w2 l2) = l1 > l2
--
--instance Eq User where
--(User n1 w1 l1) == (User n2 w2 l2) = n1 == n2 && w1 == w2 && l1 == l2

-- | The StatsM monad allows us to work on a changing set of log events.
-- As a result, we can prune what we want from the log while calculating stats,
-- thus reducing the size of the log file by not having to travel through
-- every bit of (un-lazy) data.
type StatsM = State Log

-- | Gets the messages from the current log.
getMessages :: StatsM Log
getMessages = do
-- | Gets the messages from the current log and updates the state with the remaining non-messages.
takeMessages :: StatsM Log
takeMessages = do
evts <- get
put $ map (filter (not . isMessage)) evts
return $ map (filter isMessage) evts
put $ map snd $ (map (partition isMessage) evts)
return $ map fst $ (map (partition isMessage) evts)

isMessage (Message _ _ _) = True
isMessage _ = False

workMessageStats :: StatsM [User]
workMessageStats :: StatsM [(S.ByteString, (Int, Int))]
workMessageStats = do
msgs <- getMessages
return $ map toUser (processMessages msgs)
msgs <- takeMessages
return $ processMessages msgs

calcMessageStats :: Log -> [User]
--workKickStats :: StatsM [(S.ByteString, Int)]
--workKickStats = diio
-- msgs <- takeKicks
--
calcMessageStats :: Log -> [(S.ByteString, (Int, Int))]
calcMessageStats log = evalState (workMessageStats) log

toUser (n, l, w) = User n l w
--calcKickStats :: Log -> [(S.ByteString, Int)]
--calcKickStats = evalState (workKickStats)

processMessages :: Log -> [(S.ByteString, Int, Int)]
processMessages log = map fmt $ M.toList $ mapReduce rwhnf (foldl' updateWLC M.empty)
--toUser (n, l, w) = User n l w
--toUser' (n, l) = User n l 0

processMessages :: Log -> [(S.ByteString, (Int, Int))]
processMessages log = M.toList $ mapReduce rwhnf (foldl' updateWLC M.empty)
rwhnf (M.unionsWith (sumTuples)) log
where
fmt (a, (b, c)) = (a, c, b)
processMessages' :: Log -> [(S.ByteString, Int)]
processMessages' log = M.toList $ mapReduce rwhnf (foldl' updateWLC' M.empty)
rwhnf (M.unionsWith (+)) log

-- | Alias for insertWith (it's shorter!)
updateMap :: (Ord k) => (a -> a -> a) -> k -> a -> M.Map k a -> M.Map k a
Expand All @@ -84,5 +93,9 @@ updateWLC :: M.Map S.ByteString (Int, Int) -> LogEvent -> M.Map S.ByteString (In
updateWLC map (Message ts nick line) = updateMap sumTuples nick (1, length $ S.words line) map
updateWLC map _ = map

updateWLC' :: M.Map S.ByteString Int -> LogEvent -> M.Map S.ByteString Int
updateWLC' map (Message ts nick line) = updateMap (+) nick 1 map
updateWLC' map _ = map

sumTuples (a,b) (c,d) = (a+c, b+d)

15 changes: 9 additions & 6 deletions src/Hisg/Types.hs
Expand Up @@ -22,27 +22,30 @@ import Data.List
import Data.Char
import Text.Printf

import qualified Data.ByteString.Lazy.Char8 as S
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.ByteString.Char8 as S

newtype User = User (L.ByteString, Int, Int)

-- Basic types
data Timestamp = Timestamp { ts_hour :: Int, ts_minute :: Int }
data Date = Date { day :: Int, month :: Int, year :: Int }
data User = User { userNick :: S.ByteString, userWords :: Int, userLines :: Int }
--data User = User { (userNick :: L.ByteString, (userWords :: Int, userLines :: Int)) }
data Event = Event { eventTimestamp :: Timestamp, eventType :: EventType, eventUser :: String, eventHost :: String, eventParam :: String }

data Mode = Mode { modeTS :: Timestamp, modeChars :: String, authorHost :: String, targets :: String }
data Topic = Topic { topicTs :: Timestamp, topicAuthor :: String, topicContent :: String }
data Kick = Kick { kickTs :: Timestamp, kickAuthor :: String, kickTarget :: String, kickReason :: String }

data LogEvent =
Message { timestamp :: S.ByteString, nickname :: S.ByteString, content :: S.ByteString }
Message { timestamp :: L.ByteString, nickname :: L.ByteString, content :: L.ByteString }
| Notification String
| DateChange Date
| CustomEvent Event
| ModeChange Mode
| TopicChange Topic
| KickEvent Kick
| Simple S.ByteString
| Simple L.ByteString

-- Type aliases

Expand All @@ -67,14 +70,14 @@ instance Show Timestamp where
show (Timestamp h m) = printf "%02d:%02d" h m

instance Show LogEvent where
show (Message ts nick content) = S.unpack $ S.concat [ts, S.pack " <", nick, S.pack "> ", content]
show (Message ts nick content) = L.unpack $ L.concat [ts, L.pack " <", nick, L.pack "> ", content]
show (Notification cont) = cont
show (DateChange (Date d m y)) = intercalate " " (map show [d, m, y])
show (CustomEvent ev) = show ev
show (KickEvent (Kick ts author target reason)) = show ts ++ " " ++ author ++ " kicked " ++ target ++ ", reason: " ++ reason

instance Show User where
show (User nick words lines) = (S.unpack nick) ++ " :: " ++ show words ++ " words, " ++ show lines ++ " lines"
show (User (n, w, l)) = (L.unpack n) ++ " :: " ++ show w ++ " words, " ++ show l ++ " lines"

instance Show Event where
show (Event ts evtype evuser host param) = map (toUpper) (show ts) ++ " " ++ show evtype ++ ": " ++ evuser ++ " (" ++ host ++ ")" ++ " -> " ++ param
2 changes: 1 addition & 1 deletion src/Makefile
Expand Up @@ -3,7 +3,7 @@ GHC = ghc
MAIN = hisg

all:
$(GHC) -O2 --make Hisg -o $(MAIN)
$(GHC) -O2 -threaded --make Hisg -o $(MAIN)

prof:
$(GHC) -O2 -threaded -prof -auto-all -caf-all -fforce-recomp --make Hisg -o $(MAIN)
Expand Down

0 comments on commit 43ba253

Please sign in to comment.