Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Add some messing around with graphs

  • Loading branch information...
commit a5401909a29178d8282de43a503260c41d8976de 1 parent 2d99c07
Max Bolingbroke authored

Showing 1 changed file with 370 additions and 0 deletions. Show diff stats Hide diff stats

  1. 370  Graphs.hs
370  Graphs.hs
... ...
@@ -0,0 +1,370 @@
  1
+{-# LANGUAGE ScopedTypeVariables #-}
  2
+module Graphs where
  3
+
  4
+import qualified Data.Map as M
  5
+import qualified Data.IntMap as IM
  6
+import qualified Data.Set as S
  7
+
  8
+import Control.Applicative
  9
+import Control.Arrow
  10
+import Control.Monad
  11
+import Data.List
  12
+import Data.Maybe
  13
+
  14
+
  15
+type LGraph node edge = [(node, [(edge, node)])]
  16
+
  17
+shortcutEdges :: forall node edge.
  18
+                 Ord node
  19
+              => (node -> Bool)
  20
+              -> (edge -> node -> edge -> edge)
  21
+              -> LGraph node edge
  22
+              -> LGraph node edge
  23
+shortcutEdges should_shortcut combine g = evalState visit_graph M.empty
  24
+  where
  25
+    g_map = M.fromList g
  26
+
  27
+    --visit_graph :: State (M.Map node [(edge, node)]) (LGraph node edge)
  28
+    visit_graph = sequence $ flip mapMaybe g $ \(n, ens) -> do
  29
+        guard (not (should_shortcut n))
  30
+        return $ liftM ((,) n) $ visit S.empty ens
  31
+
  32
+    --visit :: [node] -> [(edge, node)] -> State (M.Map node [(edge, node)]) [(edge, node)]
  33
+    -- Given the outgoing edges for some node, returns all the outgoing edges for that node
  34
+    -- after shortcutting
  35
+    visit path ens = concatMapM (uncurry (visit' path)) ens
  36
+
  37
+    --visit' :: S.Set node -> edge -> node -> State (M.Map node [(edge, node)]) [(edge, node)]
  38
+    -- Given an edge label and the node reached via that label, returns all the nodes reached
  39
+    -- after shortcutting
  40
+    visit' path e n' | n' `S.member` path       = return []        -- Doesn't contribute any extra paths: all paths will considered by a caller
  41
+                     | not (should_shortcut n') = return [(e, n')] -- Won't be shortcutted away, no need to look further
  42
+                     | otherwise                = do
  43
+                        -- Since n' is not in the path, we can try to memoise
  44
+                        mb_res <- liftM (M.lookup n') get
  45
+                        res <- case mb_res of
  46
+                          Just res -> return res
  47
+                          Nothing  -> do
  48
+                            res <- visit (S.insert n' path) (M.findWithDefault (error "shortcutEdges") n' g_map)
  49
+                            modify (M.insert n' res)
  50
+                            return res
  51
+                        return $ map (first (combine e n')) res
  52
+
  53
+sccs :: Ord node
  54
+     => LGraph node edge
  55
+     -- -> LGraph (LGraph node edge) [(edge, node)]
  56
+     -> [[node]]
  57
+sccs g = case execState strongconnect_graph (0, M.empty, [], []) of (_, _, _, sccs) -> sccs
  58
+  where
  59
+    g_map = M.fromList g
  60
+
  61
+    -- Observations about Tarjan's algorithm:
  62
+    --  1. strongconnect(v) is only called if v.index is undefined
  63
+    --  2. Vertex v's lowlink is only mutated by strongconnect(v)
  64
+    --  3. Once index is set it is never changed
  65
+    --
  66
+    -- We can use these facts to build an implementation that makes minimal use of the state monad
  67
+
  68
+    strongconnect_graph = forM_ g $ \(n, ens) -> do
  69
+      ix_defined <- liftM (\(_, ixs, _, _) -> n `M.member` ixs) get
  70
+      unless ix_defined $ void $ strongconnect n ens
  71
+
  72
+    -- (strongconnect n ens) returns index of a node n' reachable from n such that that index[n'] < index[n],
  73
+    -- if possible. Otherwise returns index[n].
  74
+    strongconnect n ens = do
  75
+      ix <- state $ \(next_ix, ixs, s, sccs) -> (next_ix, (next_ix + 1, M.insert n next_ix ixs, n:s, sccs))
  76
+      lowlink <- (\f -> foldM f ix ens) $ \lowlink (e, n') -> do
  77
+        (mb_ix', in_s') <- liftM (\(_, ixs, s, _) -> (M.lookup n' ixs, n' `elem` s)) get
  78
+        case mb_ix' of
  79
+          Nothing              -> liftM (lowlink `min`) $ strongconnect n' (M.findWithDefault (error "sccs") n' g_map) -- Successor not yet visited: recurse on it
  80
+          Just ix' | in_s'     -> return $ lowlink `min` ix'                                                           -- Successor is in the stack and hence the current SCC
  81
+                   | otherwise -> return lowlink
  82
+      -- Since lowlink is at most ix, this condition can only be true if we failed to find a node reachable
  83
+      -- from n with a lower index. We use this as our cue to form a new SCC.
  84
+      when (lowlink == ix) $ do
  85
+        modify $ \(next_ix, ixs, s, sccs) -> let (scc, _n:s') = span (/= n) s in (next_ix, ixs, s', (n:scc) : sccs)
  86
+      -- Return this nodes final lowlink for use when computing the predecessors lowlink
  87
+      return lowlink
  88
+
  89
+sccs' :: forall node edge.
  90
+         (Ord node, Ord edge) -- FIXME: can relax (Ord edge) requirement really..
  91
+      => LGraph node edge
  92
+      -> LGraph (LGraph node edge) [(edge, node)]
  93
+sccs' g = case execState strongconnect_graph (0, M.empty, [], [], M.empty, M.empty) of (_, _, _, sccs, _, _) -> sccs
  94
+  where
  95
+    g_map = M.fromList g
  96
+
  97
+    -- Observations about Tarjan's algorithm:
  98
+    --  1. strongconnect(v) is only called if v.index is undefined
  99
+    --  2. Vertex v's lowlink is only mutated by strongconnect(v)
  100
+    --  3. Once index is set it is never changed
  101
+    --  4. Nodes occur in the stack in decreasing order of index
  102
+    --
  103
+    -- We can use these facts to build an implementation that makes minimal use of the state monad
  104
+
  105
+    strongconnect_graph = forM_ g $ \(n, ens) -> do
  106
+      ix_defined <- liftM (\(_, ixs, _, _, _, _) -> n `M.member` ixs) get
  107
+      unless ix_defined $ void $ strongconnect n ens
  108
+
  109
+    strongconnect :: node -> [(edge, node)]
  110
+                  -> State (Int,
  111
+                            M.Map node Int,
  112
+                            [node],
  113
+                            LGraph (LGraph node edge) [(edge, node)],
  114
+                            M.Map node               [(edge, node)],
  115
+                            M.Map (LGraph node edge) [(edge, node)])
  116
+                           (Int,
  117
+                            Maybe (LGraph node edge))
  118
+    -- (strongconnect n ens) returns:
  119
+    --  1. Index of a node n' reachable from n such that that index[n'] < index[n],
  120
+    --     if possible. Otherwise returns index[n].
  121
+    --  2. The SCC graph node of the newly-created SCC. If no new SCC was created then n is guaranteed
  122
+    --     to still be on the stack (which occurs iff we managed to find a suitable index[n'])
  123
+    strongconnect n ens = do
  124
+      ix <- state $ \(next_ix, ixs, s, sccs, all_internal_ens, all_external_ens) -> (next_ix, (next_ix + 1, M.insert n next_ix ixs, n:s, sccs, all_internal_ens, all_external_ens))
  125
+      (lowlink, internal_ens, external_ens) <- (\f -> foldM f (ix, [], M.empty) ens) $ \(lowlink, internal_ens, external_ens) (e, n') -> do
  126
+        (mb_ix', in_s') <- liftM (\(_, ixs, s, _, _, _) -> (M.lookup n' ixs, n' `elem` s)) get
  127
+        (lowlink, mb_scc) <- case mb_ix' of
  128
+                                  -- Successor not yet visited: recurse on it
  129
+                                  -- Since the index assigned to n' > ix, it is guaranteed that n will still be on the
  130
+                                  -- stack when we return.
  131
+                                  -- TODO: include an edge depending on in_s'
  132
+          Nothing              -> liftM (first (lowlink `min`)) $ strongconnect n' (M.findWithDefault (error "sccs") n' g_map)
  133
+                                  -- Successor is in the stack and hence the current SCC
  134
+                                  -- TODO: include this edge as an internal edge in the SCC
  135
+          Just ix' | in_s'     -> return (lowlink `min` ix', Nothing)
  136
+                                  -- Successor visited but not in stack: it is already part of another SCC
  137
+                                  -- TODO: prepare to emit an edge from this node to that other SCC
  138
+                   | otherwise -> do scc <- liftM (\(_, _, _, sccs, _, _) -> head [scc | (scc, _) <- sccs, any (\(n'', _) -> n'' == n') scc]) get
  139
+                                     return (lowlink, Just scc)
  140
+        (internal_ens, external_ens) <- return $ case mb_scc of
  141
+                                          Nothing  -> ((e, n'):internal_ens, external_ens)
  142
+                                          Just scc -> (internal_ens, M.insertWith (++) scc [(e, n')] external_ens)
  143
+        return (lowlink, internal_ens, external_ens)
  144
+      -- Record discovered internal/external edges
  145
+      modify $ \(next_ix, ixs, s, sccs, all_internal_ens, all_external_ens) -> (next_ix, ixs, s, sccs, M.insert n internal_ens all_internal_ens, M.unionWith (++) external_ens all_external_ens)
  146
+      -- Since lowlink is at most ix, this condition can only be true if we failed to find a node reachable
  147
+      -- from n with a lower index. We use this as our cue to form a new SCC.
  148
+      mb_scc <- if (lowlink == ix)
  149
+                 -- NB: because nodes on the stack are in decreasing order of index, this operation never pops a node with index < ix
  150
+                 then do scc <- state $ \(next_ix, ixs, s, sccs, all_internal_ens, all_external_ens) -> let (s_scc, _n:s') = span (/= n) s
  151
+                                                                                                            scc = [(n, M.findWithDefault (error "sccs") n all_internal_ens) | n <- n:s_scc]
  152
+                                                                                                            all_external_ens' = [(ens, scc) | (scc, ens) <- M.toList all_external_ens]
  153
+                                                                                                        in (scc, (next_ix, ixs, s', (scc, all_external_ens') : sccs, all_internal_ens, M.empty))
  154
+                         return (Just scc)
  155
+                 else return Nothing
  156
+      -- Return this nodes final lowlink for use when computing the predecessors lowlink
  157
+      return (lowlink, mb_scc)
  158
+
  159
+-- Given a graph, returns:
  160
+--  1. An acyclic graph of the strongly connected components of the input graph.
  161
+--     Each SCC is identified by a unique Int.
  162
+--  2. A mapping from Ints to the "sub-graph" corresponding to each SCC. Each sub-graph
  163
+--     contains all the nodes in the SCC as well as any edges between those nodes.
  164
+--     Note that in particular the sub-graph for an acyclic SCC will contain exactly one node and no edges.
  165
+--
  166
+-- Uses an adaptation of Tarjan's algorithm <http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm>
  167
+sccs'' :: forall node edge.
  168
+          Ord node
  169
+       => LGraph node edge
  170
+       -> (LGraph Int [(edge, node)],
  171
+           IM.IntMap (LGraph node edge))
  172
+sccs'' g = case execState strongconnect_graph (0, M.empty, [], [], IM.empty, M.empty, IM.empty) of (_, _, _, sccs, scc_datas, _, _) -> (sccs, scc_datas)
  173
+  where
  174
+    g_map = M.fromList g
  175
+
  176
+    -- Observations about Tarjan's algorithm:
  177
+    --  1. strongconnect(v) is only called if v.index is undefined
  178
+    --  2. Vertex v's lowlink is only mutated by strongconnect(v)
  179
+    --  3. Once index is set it is never changed
  180
+    --  4. Nodes occur in the stack in decreasing order of index
  181
+    --
  182
+    -- We can use these facts to build an implementation that makes minimal use of the state monad
  183
+
  184
+    strongconnect_graph = forM_ g $ \(n, ens) -> do
  185
+      ix_defined <- liftM (\(_, ixs, _, _, _, _, _) -> n `M.member` ixs) get
  186
+      unless ix_defined $ void $ strongconnect n ens
  187
+
  188
+    -- (strongconnect n ens) returns:
  189
+    --  1. Index of a node n' reachable from n such that that index[n'] < index[n],
  190
+    --     if possible. Otherwise returns index[n].
  191
+    --  2. Whether we didn't just create a new SCC containing n. If no new SCC was created then n is guaranteed
  192
+    --     to still be on the stack (which occurs iff we managed to find a suitable index[n'])
  193
+    --
  194
+    -- Precondition: there is no assigned index for n
  195
+    strongconnect :: node -> [(edge, node)]
  196
+                  -> State (-- Next node index to assign
  197
+                            Int,
  198
+                            -- Mapping from nodes to their assigned index (if any)
  199
+                            -- NB: after the node has been removed from the stack, we update the Int in the mapping
  200
+                            -- to instead be the lowlink of the SCC it was assigned to. This is OK because we don't
  201
+                            -- need the raw index of the node after that point: we only need record the fact that
  202
+                            -- it had some index at a point in the past
  203
+                            M.Map node Int,
  204
+                            -- Stack containing expanded nodes that are not presently in a SCC
  205
+                            [node],
  206
+                            -- Work-in-progress graph of SCC
  207
+                            LGraph Int [(edge, node)],
  208
+                            -- Work-in-progress SCC sub-graph mapping
  209
+                            IM.IntMap (LGraph node edge),
  210
+                            -- Records all discovered "internal" edges from expanded nodes to somewhere *within* their SCC
  211
+                            M.Map node [(edge, node)],
  212
+                            -- Records all discovered "external" edges from the current SCC-in-progress to some other (already existant) SCC
  213
+                            -- It might seem more obvious to use a [([(edge, node)], Int)] here, but that makes it awkward to common up multiple
  214
+                            -- edges from this SCC going to the same external SCC
  215
+                            IM.IntMap  [(edge, node)])
  216
+                           (Int, Bool)
  217
+    strongconnect n ens = do
  218
+      ix <- state $ \(next_ix, ixs, s, sccs, scc_datas, all_internal_ens, all_external_ens) -> (next_ix, (next_ix + 1, M.insert n next_ix ixs, n:s, sccs, scc_datas, all_internal_ens, all_external_ens))
  219
+      (lowlink, internal_ens, external_ens) <- (\f -> foldM f (ix, [], IM.empty) ens) $ \(lowlink, internal_ens, external_ens) (e, n') -> do
  220
+        (mb_ix', in_s') <- liftM (\(_, ixs, s, _, _, _, _) -> (M.lookup n' ixs, n' `elem` s)) get
  221
+        (lowlink, mb_scc) <- case mb_ix' of
  222
+                                  -- Successor not yet visited: recurse on it
  223
+                                  -- Whether we add an internal or external edge depends on whether the recursive call created an SCC or not.
  224
+                                  -- If it did create an SCC, that SCC will be identified by lowlink'
  225
+          Nothing              -> do (lowlink', in_s') <- strongconnect n' (M.findWithDefault (error "sccs") n' g_map)
  226
+                                     return (lowlink `min` lowlink', if in_s' then Nothing else Just lowlink')
  227
+                                  -- Successor is in the stack and hence the current SCC, so record an internal edge
  228
+          Just ix' | in_s'     -> return (lowlink `min` ix', Nothing)
  229
+                                  -- Successor visited but not in stack: it is already part of another SCC, so record an external edge
  230
+                                  -- NB: this makes use of my hack whereby ix' will actually be a SCC lowlink for such successors
  231
+                   | otherwise -> return (lowlink, Just ix')
  232
+        (internal_ens, external_ens) <- return $ case mb_scc of
  233
+                                          Nothing  -> ((e, n'):internal_ens, external_ens)
  234
+                                          Just scc -> (internal_ens, IM.insertWith (++) scc [(e, n')] external_ens)
  235
+        return (lowlink, internal_ens, external_ens)
  236
+      -- Record accumulated internal/external edges. We don't need to record them as we go along because they can only possibly be used by one of our callers, not our callees
  237
+      modify $ \(next_ix, ixs, s, sccs, scc_datas, all_internal_ens, all_external_ens) -> (next_ix, ixs, s, sccs, scc_datas, M.insert n internal_ens all_internal_ens, IM.unionWith (++) external_ens all_external_ens)
  238
+      -- Since lowlink is at most ix, this condition can only be true if we failed to find a node reachable
  239
+      -- from n with a lower index. We use this as our cue to form a new SCC.
  240
+      in_s <- if (lowlink == ix)
  241
+               -- NB: because nodes on the stack are in decreasing order of index, this operation never pops a node with index < ix
  242
+              then do modify $ \(next_ix, ixs, s, sccs, scc_datas, all_internal_ens, all_external_ens) -> let (s_scc, _n:s') = span (/= n) s
  243
+                                                                                                              scc = [(n, M.findWithDefault (error "sccs") n all_internal_ens) | n <- n:s_scc]
  244
+                                                                                                              all_external_ens' = [(ens, scc) | (scc, ens) <- IM.toList all_external_ens]
  245
+                                                                                                              -- Replace node indexes with the lowlink of the SCC they were assigned to (a small hack to save one map lookup):
  246
+                                                                                                              ixs' = foldr (\n -> M.insert n lowlink) ixs (n:s_scc)
  247
+                                                                                                          in (next_ix, ixs', s', (lowlink, all_external_ens') : sccs, IM.insert lowlink scc scc_datas, all_internal_ens, IM.empty)
  248
+                      return False
  249
+              else return True
  250
+      -- Return this nodes final lowlink for use when computing the predecessors lowlink
  251
+      return (lowlink, in_s)
  252
+
  253
+obscure :: Ord node => LGraph node edge -> LGraph Int edge
  254
+obscure g = [(to_key n, [(e, to_key n') | (e, n') <- ens]) | (n, ens) <- g]
  255
+  where key_map = M.fromList [(n, i) | ((n, _), i) <- g `zip` [0..]]
  256
+        to_key n = M.findWithDefault (error "obscure") n key_map
  257
+
  258
+
  259
+{-
  260
+sccs g = search_graph -- FIXME
  261
+  where
  262
+    g_map = M.fromList g
  263
+
  264
+    allocatePreorderNumber n = state $ \(next_pon, pons, s, p, assigned, sccs) -> case M.lookup n pons of
  265
+      Just pon -> ((False, pon),      (next_pon,     pons,                     s, p, assigned, sccs))
  266
+      Nothing  -> ((True,  next_pon), (next_pon + 1, M.insert n next_pon pons, s, p, assigned, sccs))
  267
+
  268
+    search_graph = case execState (mapM_ (uncurry search) g) (0, M.empty, [], [], S.empty, []) of (_, _, _, _, _, sccs) -> sccs
  269
+
  270
+    search n ens = do
  271
+      (fresh, pon) <- allocatePreorderNumber n
  272
+      if not fresh
  273
+       then return (Just pon)
  274
+       else do
  275
+        -- Push onto both stacks
  276
+        modify $ \(next_pon, pons, s, p, assigned, sccs) -> (next_pon, pons, n:s, (n, pon):p, assigned, sccs)
  277
+        -- Consider paths
  278
+        forM ens $ \(e, n') -> do
  279
+          mb_n'_pon <- search n' (M.findWithDefault (error "sccs") n' g_map)
  280
+          case mb_n'_pon of
  281
+            -- PON was not yet assigned, the recursive search was enough
  282
+            Nothing -> return ()
  283
+            -- PON was already assigned, need to mess with p
  284
+            Just n'_pon -> modify $ \(next_pon, pons, s, p, assigned, sccs) -> if n' `S.member` assigned
  285
+                                                                               then (next_pon, pons, s, p, assigned, sccs)
  286
+                                                                               else (next_pon, pons, s, dropWhile (\(_, p_pon) -> p_pon > n'_pon) p, assigned, sccs)
  287
+        modify $ \(next_pon, pons, s, p, assigned, sccs) -> case p of
  288
+          (p_head, _):p_tail | p_head == n -> (next_pon, pons, s, p_tail, assigned `S.union` S.fromList scc, scc : sccs)
  289
+            where (s1, _n:s2) = span (/= n) s
  290
+                  scc = n:s1
  291
+          _ -> (next_pon, pons, s, p, assigned, sccs)
  292
+        return Nothing
  293
+-}
  294
+
  295
+
  296
+g0 = [("Root", [("a", "Residual Loop")
  297
+               ,("b", "Fully Shortcutted Loop")
  298
+               ,("c", "Shortcutted Loop")
  299
+               ,("d", "Indirect Non-Loop 1")
  300
+               ,("e", "Indirect Non-Loop 2")])
  301
+     ,("Residual Loop", [("f", "Residual Loop")])
  302
+     ,("Fully Shortcutted Loop", [("g", "Fully Shortcutted Loop")])
  303
+     ,("Shortcutted Loop", [("h", "Shortcutted Loop 1")])
  304
+     ,("Shortcutted Loop 1", [("i", "Shortcutted Loop")])
  305
+     ,("Indirect Non-Loop 1", [("j", "Indirect Non-Loop 2")])
  306
+     ,("Indirect Non-Loop 2", [])]
  307
+
  308
+test1 = shortcutEdges (`elem` ["Fully Shortcutted Loop", "Shortcutted Loop 1"]) (\e1 n e2 -> e1 ++ "(" ++ n ++ ")" ++ e2) g0
  309
+
  310
+test2 = sccs g0
  311
+
  312
+test3 = sccs test1
  313
+
  314
+test4 = sccs'' g0
  315
+
  316
+test5 = sccs'' test1
  317
+
  318
+
  319
+-- Code below this line stolen from GHC to save time
  320
+newtype State s a = State { runState' :: s -> ( a, s ) }
  321
+
  322
+instance Functor (State s) where
  323
+    fmap f m  = State $ \s -> case runState' m s of
  324
+                              ( r, s' ) -> ( f r, s' )
  325
+
  326
+instance Applicative (State s) where
  327
+   pure x   = State $ \s -> ( x, s )
  328
+   m <*> n  = State $ \s -> case runState' m s of
  329
+                            ( f, s' ) -> case runState' n s' of
  330
+                                           ( x, s'' ) -> ( f x, s'' )
  331
+
  332
+instance Monad (State s) where
  333
+    return x = State $ \s -> ( x, s )
  334
+    m >>= n  = State $ \s -> case runState' m s of
  335
+                             ( r, s' ) -> runState' (n r) s'
  336
+
  337
+get :: State s s
  338
+get = State $ \s -> ( s, s )
  339
+
  340
+gets :: (s -> a) -> State s a
  341
+gets f = State $ \s -> ( f s, s )
  342
+
  343
+put :: s -> State s ()
  344
+put s' = State $ \_ -> ( (), s' )
  345
+
  346
+modify :: (s -> s) -> State s ()
  347
+modify f = State $ \s -> ( (), f s )
  348
+
  349
+state :: (s -> (a, s)) -> State s a
  350
+state f = State $ \s -> case f s of (x, s') -> ( x, s' )
  351
+
  352
+
  353
+evalState :: State s a -> s -> a
  354
+evalState s i = case runState' s i of
  355
+                ( a, _ ) -> a
  356
+
  357
+
  358
+execState :: State s a -> s -> s
  359
+execState s i = case runState' s i of
  360
+                ( _, s' ) -> s'
  361
+
  362
+
  363
+runState :: State s a -> s -> (a, s)
  364
+runState s i = case runState' s i of
  365
+               ( a, s' ) -> (a, s')
  366
+
  367
+
  368
+-- | Monadic version of concatMap
  369
+concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b]
  370
+concatMapM f xs = liftM concat (mapM f xs)

0 notes on commit a540190

Please sign in to comment.
Something went wrong with that request. Please try again.