/
Node.hs
1285 lines (1179 loc) · 50.3 KB
/
Node.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE MagicHash #-}
-- | Local nodes
--
module Control.Distributed.Process.Node
( LocalNode
, newLocalNode
, closeLocalNode
, forkProcess
, runProcess
, initRemoteTable
, localNodeId
) where
-- TODO: Calls to 'sendBinary' and co (by the NC) may stall the node controller.
import System.IO (fixIO, hPutStrLn, stderr)
import System.Mem.Weak (Weak, deRefWeak)
import qualified Data.ByteString.Lazy as BSL (fromChunks)
import Data.Binary (decode)
import Data.Map (Map)
import qualified Data.Map as Map
( empty
, toList
, fromList
, filter
, partitionWithKey
, elems
, size
, filterWithKey
, foldlWithKey
)
import Data.Time.Format (formatTime)
#if MIN_VERSION_time(1,5,0)
import Data.Time.Format (defaultTimeLocale)
#else
import System.Locale (defaultTimeLocale)
#endif
import Data.Set (Set)
import qualified Data.Set as Set
( empty
, insert
, delete
, map
, member
, toList
, union
)
import Data.Foldable (forM_)
import Data.Maybe (isJust, fromJust, isNothing, catMaybes)
import Data.Typeable (Typeable)
import Control.Category ((>>>))
import Control.Applicative
import Control.Monad (void, when, join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.State.Strict (MonadState, StateT, evalStateT, gets)
import qualified Control.Monad.State.Strict as StateT (get, put)
import Control.Monad.Reader (MonadReader, ReaderT, runReaderT, ask)
import Control.Exception
( throwIO
, SomeException
, Exception
, throwTo
, uninterruptibleMask_
, getMaskingState
, MaskingState(..)
)
import qualified Control.Exception as Exception
( Handler(..)
, catch
, catches
, finally
)
import Control.Concurrent (forkIO, killThread)
import Control.Distributed.Process.Internal.BiMultiMap (BiMultiMap)
import qualified Control.Distributed.Process.Internal.BiMultiMap as BiMultiMap
import Control.Distributed.Process.Internal.StrictMVar
( newMVar
, withMVar
, modifyMVarMasked
, modifyMVar
, newEmptyMVar
, putMVar
, takeMVar
)
import Control.Concurrent.Chan (newChan, writeChan, readChan)
import qualified Control.Concurrent.MVar as MVar (newEmptyMVar, takeMVar)
import Control.Concurrent.STM
( atomically
)
import Control.Distributed.Process.Internal.CQueue
( CQueue
, enqueue
, newCQueue
, mkWeakCQueue
, queueSize
)
import qualified Network.Transport as NT
( Transport
, EndPoint
, newEndPoint
, receive
, Event(..)
, EventErrorCode(..)
, TransportError(..)
, address
, closeEndPoint
, Connection
, ConnectionId
, close
, EndPointAddress
, Reliability(ReliableOrdered)
)
import Data.Accessor (Accessor, accessor, (^.), (^=), (^:))
import System.Random (randomIO)
import Control.Distributed.Static (RemoteTable, Closure)
import qualified Control.Distributed.Static as Static
( unclosure
, initRemoteTable
)
import Control.Distributed.Process.Internal.Types
( NodeId(..)
, LocalProcessId(..)
, ProcessId(..)
, LocalNode(..)
, MxEventBus(..)
, LocalNodeState(..)
, ValidLocalNodeState(..)
, withValidLocalState
, modifyValidLocalState
, LocalProcess(..)
, LocalProcessState(..)
, Process(..)
, DiedReason(..)
, NCMsg(..)
, ProcessSignal(..)
, localPidCounter
, localPidUnique
, localProcessWithId
, localProcesses
, localConnections
, forever'
, MonitorRef(..)
, NodeClosedException(..)
, ProcessMonitorNotification(..)
, NodeMonitorNotification(..)
, PortMonitorNotification(..)
, ProcessExitException(..)
, ProcessLinkException(..)
, NodeLinkException(..)
, PortLinkException(..)
, DidUnmonitor(..)
, DidUnlinkProcess(..)
, DidUnlinkNode(..)
, DidUnlinkPort(..)
, SpawnRef
, DidSpawn(..)
, Message(..)
, TypedChannel(..)
, Identifier(..)
, nodeOf
, ProcessInfo(..)
, ProcessInfoNone(..)
, NodeStats(..)
, SendPortId(..)
, typedChannelWithId
, RegisterReply(..)
, WhereIsReply(..)
, payloadToMessage
, createUnencodedMessage
, unsafeCreateUnencodedMessage
, runLocalProcess
, firstNonReservedProcessId
, ImplicitReconnect(WithImplicitReconnect)
)
import Control.Distributed.Process.Management.Internal.Agent
( mxAgentController
)
import qualified Control.Distributed.Process.Management.Internal.Trace.Remote as Trace
( remoteTable
)
import Control.Distributed.Process.Management.Internal.Trace.Tracer
( defaultTracer
)
import Control.Distributed.Process.Management.Internal.Trace.Types
( TraceArg(..)
, traceEvent
, traceLogFmt
, enableTrace
)
import Control.Distributed.Process.Management.Internal.Types
( MxEvent(..)
)
import Control.Distributed.Process.Serializable (Serializable)
import Control.Distributed.Process.Internal.Messaging
( sendBinary
, closeImplicitReconnections
, impliesDeathOf
)
import Control.Distributed.Process.Internal.Primitives
( register
, receiveWait
, match
, sendChan
, unwrapMessage
, SayMessage(..)
)
import Control.Distributed.Process.Internal.Types (SendPort, Tracer(..))
import qualified Control.Distributed.Process.Internal.Closure.BuiltIn as BuiltIn (remoteTable)
import Control.Distributed.Process.Internal.WeakTQueue (TQueue, writeTQueue)
import qualified Control.Distributed.Process.Internal.StrictContainerAccessors as DAC
( mapMaybe
, mapDefault
)
import Control.Monad.Catch (try)
import GHC.IO (IO(..), unsafeUnmask)
import GHC.Base ( maskAsyncExceptions# )
import Unsafe.Coerce
import Prelude
-- Remove these definitions when the fix for
-- https://ghc.haskell.org/trac/ghc/ticket/10149
-- is included in all supported compilers:
block :: IO a -> IO a
block (IO io) = IO $ maskAsyncExceptions# io
unblock :: IO a -> IO a
unblock = unsafeUnmask
--------------------------------------------------------------------------------
-- Initialization --
--------------------------------------------------------------------------------
initRemoteTable :: RemoteTable
initRemoteTable = Trace.remoteTable $ BuiltIn.remoteTable Static.initRemoteTable
-- | Initialize a new local node.
newLocalNode :: NT.Transport -> RemoteTable -> IO LocalNode
newLocalNode transport rtable = do
mEndPoint <- NT.newEndPoint transport
case mEndPoint of
Left ex -> throwIO ex
Right endPoint -> do
localNode <- createBareLocalNode endPoint rtable
startServiceProcesses localNode
return localNode
-- | Create a new local node (without any service processes running)
createBareLocalNode :: NT.EndPoint -> RemoteTable -> IO LocalNode
createBareLocalNode endPoint rtable = do
unq <- randomIO
state <- newMVar $ LocalNodeValid $ ValidLocalNodeState
{ _localProcesses = Map.empty
, _localPidCounter = firstNonReservedProcessId
, _localPidUnique = unq
, _localConnections = Map.empty
}
ctrlChan <- newChan
let node = LocalNode { localNodeId = NodeId $ NT.address endPoint
, localEndPoint = endPoint
, localState = state
, localCtrlChan = ctrlChan
, localEventBus = MxEventBusInitialising
, remoteTable = rtable
}
tracedNode <- startMxAgent node
-- Once the NC terminates, the endpoint isn't much use,
void $ forkIO $ Exception.finally (runNodeController tracedNode)
(NT.closeEndPoint (localEndPoint node))
-- whilst a closed/failing endpoint will terminate the NC
void $ forkIO $ Exception.finally (handleIncomingMessages tracedNode)
(stopNC node)
return tracedNode
where
stopNC node =
writeChan (localCtrlChan node) NCMsg
{ ctrlMsgSender = NodeIdentifier (localNodeId node)
, ctrlMsgSignal = SigShutdown
}
startMxAgent :: LocalNode -> IO LocalNode
startMxAgent node = do
-- see note [tracer/forkProcess races]
let fork = forkProcess node
mv <- MVar.newEmptyMVar
pid <- fork $ mxAgentController fork mv
(tracer', wqRef, mxNew') <- MVar.takeMVar mv
return node { localEventBus = (MxEventBus pid tracer' wqRef mxNew') }
startDefaultTracer :: LocalNode -> IO ()
startDefaultTracer node' = do
let t = localEventBus node'
case t of
MxEventBus _ (Tracer pid _) _ _ -> do
runProcess node' $ register "trace.controller" pid
pid' <- forkProcess node' defaultTracer
enableTrace (localEventBus node') pid'
runProcess node' $ register "tracer.initial" pid'
_ -> return ()
-- TODO: we need a better mechanism for defining and registering services
-- | Start and register the service processes on a node
startServiceProcesses :: LocalNode -> IO ()
startServiceProcesses node = do
-- tracing /spawns/ relies on the tracer being enabled, but we start
-- the default tracer first, even though it might @nsend@ to the logger
-- before /that/ process has started - this is a totally harmless race
-- however, so we deliberably ignore it
startDefaultTracer node
logger <- forkProcess node loop
runProcess node $ do
register "logger" logger
-- The trace.logger is used for tracing to the console to avoid feedback
-- loops during tracing if the user reregisters the "logger" with a custom
-- process which uses 'send' or other primitives which are traced.
register "trace.logger" logger
where
loop = do
receiveWait
[ match $ \(SayMessage time pid string) -> do
let time' = formatTime defaultTimeLocale "%c" time
liftIO . hPutStrLn stderr $ time' ++ " " ++ show pid ++ ": " ++ string
loop
, match $ \((time, string) :: (String, String)) -> do
-- this is a 'trace' message from the local node tracer
liftIO . hPutStrLn stderr $ time ++ " [trace] " ++ string
loop
, match $ \(ch :: SendPort ()) -> -- a shutdown request
sendChan ch ()
]
-- | Force-close a local node, killing all processes on that node.
closeLocalNode :: LocalNode -> IO ()
closeLocalNode node = do
-- Kill processes after refilling the mvar. Otherwise, there is potential for
-- deadlock as a dying process tries to get the mvar while masking exceptions
-- uninterruptibly.
join $ modifyMVar (localState node) $ \st -> case st of
LocalNodeValid vst -> do
return ( LocalNodeClosed
, forM_ (vst ^. localProcesses) $ \lproc ->
-- Semantics of 'throwTo' guarantee that target thread will get
-- delivered an exception. Therefore, target thread will be
-- killed eventually and that's as good as we can do. No need
-- to wait for thread to actually finish dying.
killThread (processThread lproc)
)
LocalNodeClosed -> return (LocalNodeClosed, return ())
-- This call will have the effect of shutting down the NC as well (see
-- 'createBareLocalNode').
NT.closeEndPoint (localEndPoint node)
-- | Run a process on a local node and wait for it to finish
runProcess :: LocalNode -> Process () -> IO ()
runProcess node proc = do
done <- newEmptyMVar
-- TODO; When forkProcess inherits the masking state, protect the forked
-- thread against async exceptions that could occur before 'try' is evaluated.
void $ forkProcess node $ try proc >>= liftIO . putMVar done
takeMVar done >>= either (throwIO :: SomeException -> IO a) return
-- | Spawn a new process on a local node
forkProcess :: LocalNode -> Process () -> IO ProcessId
forkProcess node proc = do
ms <- getMaskingState
modifyMVarMasked (localState node) (startProcess ms)
where
startProcess :: MaskingState
-> LocalNodeState
-> IO (LocalNodeState, ProcessId)
startProcess ms (LocalNodeValid vst) = do
let lpid = LocalProcessId { lpidCounter = vst ^. localPidCounter
, lpidUnique = vst ^. localPidUnique
}
let pid = ProcessId { processNodeId = localNodeId node
, processLocalId = lpid
}
pst <- newMVar LocalProcessState { _monitorCounter = 0
, _spawnCounter = 0
, _channelCounter = 0
, _typedChannels = Map.empty
}
queue <- newCQueue
weakQueue <- mkWeakCQueue queue (return ())
(_, lproc) <- fixIO $ \ ~(tid, _) -> do
let lproc = LocalProcess { processQueue = queue
, processWeakQ = weakQueue
, processId = pid
, processState = pst
, processThread = tid
, processNode = node
}
-- Rewrite this code when this is fixed:
-- https://ghc.haskell.org/trac/ghc/ticket/10149
let unmask = case ms of
Unmasked -> unblock
MaskedInterruptible -> block
MaskedUninterruptible -> id
tid' <- uninterruptibleMask_ $ forkIO $ do
reason <- Exception.catches
(unmask $ runLocalProcess lproc proc >> return DiedNormal)
[ (Exception.Handler (\ex@(ProcessExitException from msg) -> do
mMsg <- unwrapMessage msg :: IO (Maybe String)
case mMsg of
Nothing -> return $ DiedException $ show ex
Just m -> return $ DiedException ("exit-from=" ++ (show from) ++ ",reason=" ++ m)))
, (Exception.Handler
(return . DiedException . (show :: SomeException -> String)))]
-- [Unified: Table 4, rules termination and exiting]
mconns <- modifyValidLocalState node (cleanupProcess pid)
-- XXX: Revisit after agreeing on the bigger picture for the semantics
-- of transport operations.
-- https://github.com/haskell-distributed/distributed-process/issues/204
forM_ mconns $ forkIO . mapM_ NT.close
writeChan (localCtrlChan node) NCMsg
{ ctrlMsgSender = ProcessIdentifier pid
, ctrlMsgSignal = Died (ProcessIdentifier pid) reason
}
return (tid', lproc)
-- see note [tracer/forkProcess races]
trace node (MxSpawned pid)
if lpidCounter lpid == maxBound
then do
-- TODO: this doesn't look right at all - how do we know
-- that newUnique represents a process id that is available!?
newUnique <- randomIO
return ( LocalNodeValid
$ (localProcessWithId lpid ^= Just lproc)
. (localPidCounter ^= firstNonReservedProcessId)
. (localPidUnique ^= newUnique)
$ vst
, pid
)
else
return ( LocalNodeValid
$ (localProcessWithId lpid ^= Just lproc)
. (localPidCounter ^: (+ 1))
$ vst
, pid
)
startProcess _ LocalNodeClosed =
throwIO $ NodeClosedException $ localNodeId node
cleanupProcess :: ProcessId
-> ValidLocalNodeState
-> IO (ValidLocalNodeState, [NT.Connection])
cleanupProcess pid vst = do
let pid' = ProcessIdentifier pid
let (affected, unaffected) = Map.partitionWithKey (\(fr, _to) !_v -> impliesDeathOf pid' fr) (vst ^. localConnections)
return ( (localProcessWithId (processLocalId pid) ^= Nothing)
. (localConnections ^= unaffected)
$ vst
, map fst $ Map.elems affected
)
-- note [tracer/forkProcess races]
--
-- Our startTracing function uses forkProcess to start the trace controller
-- process, and of course forkProcess attempts to call traceEvent once the
-- process has started. This is harmless, as the localEventBus is not updated
-- until /after/ the initial forkProcess completes, so the first call to
-- traceEvent behaves as if tracing were disabled (i.e., it is ignored).
--
--------------------------------------------------------------------------------
-- Handle incoming messages --
--------------------------------------------------------------------------------
type IncomingConnection = (NT.EndPointAddress, IncomingTarget)
data IncomingTarget =
Uninit
| ToProc ProcessId (Weak (CQueue Message))
| ToChan TypedChannel
| ToNode
data ConnectionState = ConnectionState {
_incoming :: !(Map NT.ConnectionId IncomingConnection)
, _incomingFrom :: !(Map NT.EndPointAddress (Set NT.ConnectionId))
}
initConnectionState :: ConnectionState
initConnectionState = ConnectionState {
_incoming = Map.empty
, _incomingFrom = Map.empty
}
incoming :: Accessor ConnectionState (Map NT.ConnectionId IncomingConnection)
incoming = accessor _incoming (\conns st -> st { _incoming = conns })
incomingAt :: NT.ConnectionId -> Accessor ConnectionState (Maybe IncomingConnection)
incomingAt cid = incoming >>> DAC.mapMaybe cid
incomingFrom :: NT.EndPointAddress -> Accessor ConnectionState (Set NT.ConnectionId)
incomingFrom addr = aux >>> DAC.mapDefault Set.empty addr
where
aux = accessor _incomingFrom (\fr st -> st { _incomingFrom = fr })
handleIncomingMessages :: LocalNode -> IO ()
handleIncomingMessages node = go initConnectionState
`Exception.catch` \(NodeClosedException _) -> return ()
where
go :: ConnectionState -> IO ()
go !st = do
event <- NT.receive endpoint
case event of
NT.ConnectionOpened cid rel theirAddr ->
if rel == NT.ReliableOrdered
then
trace node (MxConnected cid theirAddr)
>> go (
(incomingAt cid ^= Just (theirAddr, Uninit))
. (incomingFrom theirAddr ^: Set.insert cid)
$ st
)
else invalidRequest cid st $
"attempt to connect with unsupported reliability " ++ show rel
NT.Received cid payload ->
case st ^. incomingAt cid of
Just (_, ToProc pid weakQueue) -> do
mQueue <- deRefWeak weakQueue
forM_ mQueue $ \queue -> do
-- TODO: if we find that the queue is Nothing, should we remove
-- it from the NC state? (and same for channels, below)
let msg = payloadToMessage payload
enqueue queue msg -- 'enqueue' is strict
trace node (MxReceived pid msg)
go st
Just (_, ToChan (TypedChannel chan')) -> do
mChan <- deRefWeak chan'
-- If mChan is Nothing, the process has given up the read end of
-- the channel and we simply ignore the incoming message
forM_ mChan $ \chan -> atomically $
-- We make sure the message is fully decoded when it is enqueued
writeTQueue chan $! decode (BSL.fromChunks payload)
go st
Just (_, ToNode) -> do
let ctrlMsg = decode . BSL.fromChunks $ payload
writeChan ctrlChan $! ctrlMsg
go st
Just (src, Uninit) ->
case decode (BSL.fromChunks payload) of
ProcessIdentifier pid -> do
let lpid = processLocalId pid
mProc <- withValidLocalState node $ return . (^. localProcessWithId lpid)
case mProc of
Just proc ->
go (incomingAt cid ^= Just (src, ToProc pid (processWeakQ proc)) $ st)
Nothing ->
-- incoming attempt to connect to unknown process - might
-- be dead already
go (incomingAt cid ^= Nothing $ st)
SendPortIdentifier chId -> do
let lcid = sendPortLocalId chId
lpid = processLocalId (sendPortProcessId chId)
mProc <- withValidLocalState node $ return . (^. localProcessWithId lpid)
case mProc of
Just proc -> do
mChannel <- withMVar (processState proc) $ return . (^. typedChannelWithId lcid)
case mChannel of
Just channel ->
go (incomingAt cid ^= Just (src, ToChan channel) $ st)
Nothing ->
invalidRequest cid st $
"incoming attempt to connect to unknown channel of"
++ " process " ++ show (sendPortProcessId chId)
Nothing ->
-- incoming attempt to connect to channel of unknown
-- process - might be dead already
go (incomingAt cid ^= Nothing $ st)
NodeIdentifier nid ->
if nid == localNodeId node
then go (incomingAt cid ^= Just (src, ToNode) $ st)
else invalidRequest cid st $
"incoming attempt to connect to a different node -"
++ " I'm " ++ show (localNodeId node)
++ " but the remote peer wants to connect to "
++ show nid
Nothing ->
invalidRequest cid st
"message received from an unknown connection"
NT.ConnectionClosed cid ->
case st ^. incomingAt cid of
Nothing ->
invalidRequest cid st "closed unknown connection"
Just (src, _) -> do
trace node (MxDisconnected cid src)
go ( (incomingAt cid ^= Nothing)
. (incomingFrom src ^: Set.delete cid)
$ st
)
NT.ErrorEvent (NT.TransportError (NT.EventConnectionLost theirAddr) _) -> do
-- [Unified table 9, rule node_disconnect]
let nid = NodeIdentifier $ NodeId theirAddr
writeChan ctrlChan NCMsg
{ ctrlMsgSender = nid
, ctrlMsgSignal = Died nid DiedDisconnect
}
let notLost k = not (k `Set.member` (st ^. incomingFrom theirAddr))
closeImplicitReconnections node nid
go ( (incomingFrom theirAddr ^= Set.empty)
. (incoming ^: Map.filterWithKey (const . notLost))
$ st
)
NT.ErrorEvent (NT.TransportError NT.EventEndPointFailed str) ->
fail $ "Cloud Haskell fatal error: end point failed: " ++ str
NT.ErrorEvent (NT.TransportError NT.EventTransportFailed str) ->
fail $ "Cloud Haskell fatal error: transport failed: " ++ str
NT.EndPointClosed ->
return ()
NT.ReceivedMulticast _ _ ->
-- If we received a multicast message, something went horribly wrong
-- and we just give up
fail "Cloud Haskell fatal error: received unexpected multicast"
invalidRequest :: NT.ConnectionId -> ConnectionState -> String -> IO ()
invalidRequest cid st msg = do
-- TODO: We should treat this as a fatal error on the part of the remote
-- node. That is, we should report the remote node as having died, and we
-- should close incoming connections (this requires a Transport layer
-- extension).
traceEventFmtIO node "" [ TraceStr $ " [network] invalid request"
++ " (" ++ msg ++ "): "
, (Trace cid)
]
go ( incomingAt cid ^= Nothing
$ st
)
endpoint = localEndPoint node
ctrlChan = localCtrlChan node
--------------------------------------------------------------------------------
-- Top-level access to the node controller --
--------------------------------------------------------------------------------
runNodeController :: LocalNode -> IO ()
runNodeController node =
runReaderT (evalStateT (unNC nodeController) initNCState) node
`Exception.catch` \(NodeClosedException _) -> return ()
--------------------------------------------------------------------------------
-- Internal data types --
--------------------------------------------------------------------------------
data NCState = NCState
{ -- Mapping from remote processes to linked local processes
_links :: !(BiMultiMap Identifier ProcessId ())
-- Mapping from remote processes to monitoring local processes
, _monitors :: !(BiMultiMap Identifier ProcessId MonitorRef)
-- Process registry: names and where they live, mapped to the PIDs
, _registeredHere :: !(Map String ProcessId)
, _registeredOnNodes :: !(Map ProcessId [(NodeId,Int)])
}
newtype NC a = NC { unNC :: StateT NCState (ReaderT LocalNode IO) a }
deriving ( Applicative
, Functor
, Monad
, MonadIO
, MonadState NCState
, MonadReader LocalNode
)
initNCState :: NCState
initNCState = NCState { _links = BiMultiMap.empty
, _monitors = BiMultiMap.empty
, _registeredHere = Map.empty
, _registeredOnNodes = Map.empty
}
-- | Thrown in response to the user invoking 'kill' (see Primitives.hs). This
-- type is deliberately not exported so it cannot be caught explicitly.
data ProcessKillException =
ProcessKillException !ProcessId !String
deriving (Typeable)
instance Exception ProcessKillException
instance Show ProcessKillException where
show (ProcessKillException pid reason) =
"killed-by=" ++ show pid ++ ",reason=" ++ reason
ncSendToProcess :: ProcessId -> Message -> NC ()
ncSendToProcess = ncSendToProcessAndTrace True
ncSendToProcessAndTrace :: Bool -> ProcessId -> Message -> NC ()
ncSendToProcessAndTrace shouldTrace pid msg = do
node <- ask
if processNodeId pid == localNodeId node
then ncEffectLocalSendAndTrace shouldTrace node pid msg
else liftIO $ sendBinary node
(NodeIdentifier $ localNodeId node)
(NodeIdentifier $ processNodeId pid)
WithImplicitReconnect
NCMsg { ctrlMsgSender = NodeIdentifier $ localNodeId node
, ctrlMsgSignal = UnreliableSend (processLocalId pid) msg
}
ncSendToNode :: NodeId -> NCMsg -> NC ()
ncSendToNode to msg = do
node <- ask
liftIO $ if to == localNodeId node
then writeChan (localCtrlChan node) $! msg
else sendBinary node
(NodeIdentifier $ localNodeId node)
(NodeIdentifier to)
WithImplicitReconnect
msg
--------------------------------------------------------------------------------
-- Tracing/Debugging --
--------------------------------------------------------------------------------
-- [Issue #104 / DP-13]
traceNotifyDied :: LocalNode -> Identifier -> DiedReason -> NC ()
traceNotifyDied node ident reason =
-- TODO: sendPortDied notifications
liftIO $ withLocalTracer node $ \t ->
case ident of
(NodeIdentifier nid) -> traceEvent t (MxNodeDied nid reason)
(ProcessIdentifier pid) -> traceEvent t (MxProcessDied pid reason)
_ -> return ()
traceEventFmtIO :: LocalNode
-> String
-> [TraceArg]
-> IO ()
traceEventFmtIO node fmt args =
withLocalTracer node $ \t -> traceLogFmt t fmt args
trace :: LocalNode -> MxEvent -> IO ()
trace node ev = withLocalTracer node $ \t -> traceEvent t ev
withLocalTracer :: LocalNode -> (MxEventBus -> IO ()) -> IO ()
withLocalTracer node act = act (localEventBus node)
--------------------------------------------------------------------------------
-- Core functionality --
--------------------------------------------------------------------------------
-- [Unified: Table 7]
nodeController :: NC ()
nodeController = do
node <- ask
forever' $ do
msg <- liftIO $ readChan (localCtrlChan node)
-- [Unified: Table 7, rule nc_forward]
case destNid (ctrlMsgSignal msg) of
Just nid' | nid' /= localNodeId node ->
ncSendToNode nid' msg
_ ->
return ()
case msg of
NCMsg (ProcessIdentifier from) (Link them) ->
ncEffectMonitor from them Nothing
NCMsg (ProcessIdentifier from) (Monitor ref) ->
ncEffectMonitor from (monitorRefIdent ref) (Just ref)
NCMsg (ProcessIdentifier from) (Unlink them) ->
ncEffectUnlink from them
NCMsg (ProcessIdentifier from) (Unmonitor ref) ->
ncEffectUnmonitor from ref
NCMsg _from (Died ident reason) ->
ncEffectDied ident reason
NCMsg (ProcessIdentifier from) (Spawn proc ref) ->
ncEffectSpawn from proc ref
NCMsg (ProcessIdentifier from) (Register label atnode pid force) ->
ncEffectRegister from label atnode pid force
NCMsg (ProcessIdentifier from) (WhereIs label) ->
ncEffectWhereIs from label
NCMsg _ (NamedSend label msg') ->
ncEffectNamedSend label msg'
NCMsg _ (UnreliableSend lpid msg') ->
ncEffectLocalSend node (ProcessId (localNodeId node) lpid) msg'
NCMsg _ (LocalSend to msg') ->
ncEffectLocalSend node to msg'
NCMsg _ (LocalPortSend to msg') ->
ncEffectLocalPortSend to msg'
NCMsg (ProcessIdentifier from) (Kill to reason) ->
ncEffectKill from to reason
NCMsg (ProcessIdentifier from) (Exit to reason) ->
ncEffectExit from to reason
NCMsg (ProcessIdentifier from) (GetInfo pid) ->
ncEffectGetInfo from pid
NCMsg _ SigShutdown ->
liftIO $ do
NT.closeEndPoint (localEndPoint node)
`Exception.finally` throwIO (NodeClosedException $ localNodeId node)
NCMsg (ProcessIdentifier from) (GetNodeStats nid) ->
ncEffectGetNodeStats from nid
unexpected ->
error $ "nodeController: unexpected message " ++ show unexpected
-- [Unified: Table 10]
ncEffectMonitor :: ProcessId -- ^ Who's watching?
-> Identifier -- ^ Who's being watched?
-> Maybe MonitorRef -- ^ 'Nothing' to link
-> NC ()
ncEffectMonitor from them mRef = do
node <- ask
shouldLink <-
if not (isLocal node them)
then return True
else isValidLocalIdentifier them
case (shouldLink, isLocal node (ProcessIdentifier from)) of
(True, _) -> -- [Unified: first rule]
case mRef of
Just ref -> modify' $ monitors ^: BiMultiMap.insert them from ref
Nothing -> modify' $ links ^: BiMultiMap.insert them from ()
(False, True) -> -- [Unified: second rule]
notifyDied from them DiedUnknownId mRef
(False, False) -> -- [Unified: third rule]
-- TODO: this is the right sender according to the Unified semantics,
-- but perhaps having 'them' as the sender would make more sense
-- (see also: notifyDied)
ncSendToNode (processNodeId from) $ NCMsg
{ ctrlMsgSender = NodeIdentifier (localNodeId node)
, ctrlMsgSignal = Died them DiedUnknownId
}
-- [Unified: Table 11]
ncEffectUnlink :: ProcessId -> Identifier -> NC ()
ncEffectUnlink from them = do
node <- ask
when (isLocal node (ProcessIdentifier from)) $
case them of
ProcessIdentifier pid ->
postAsMessage from $ DidUnlinkProcess pid
NodeIdentifier nid ->
postAsMessage from $ DidUnlinkNode nid
SendPortIdentifier cid ->
postAsMessage from $ DidUnlinkPort cid
modify' $ links ^: BiMultiMap.delete them from ()
-- [Unified: Table 11]
ncEffectUnmonitor :: ProcessId -> MonitorRef -> NC ()
ncEffectUnmonitor from ref = do
node <- ask
when (isLocal node (ProcessIdentifier from)) $
postAsMessage from $ DidUnmonitor ref
modify' $ monitors ^: BiMultiMap.delete (monitorRefIdent ref) from ref
-- [Unified: Table 12]
ncEffectDied :: Identifier -> DiedReason -> NC ()
ncEffectDied ident reason = do
node <- ask
traceNotifyDied node ident reason
(affectedLinks, unaffectedLinks) <- gets (splitNotif ident . (^. links))
(affectedMons, unaffectedMons) <- gets (splitNotif ident . (^. monitors))
-- _registry :: !(Map (String,NodeId) ProcessId)
let localOnly = case ident of NodeIdentifier _ -> True ; _ -> False
forM_ (Map.toList affectedLinks) $ \(them, uss) ->
forM_ uss $ \(us, _) ->
when (localOnly <= isLocal node (ProcessIdentifier us)) $
notifyDied us them reason Nothing
forM_ (Map.toList affectedMons) $ \(them, refs) ->
forM_ refs $ \(us, ref) ->
when (localOnly <= isLocal node (ProcessIdentifier us)) $
notifyDied us them reason (Just ref)
-- Notify remote nodes that the process died so it can be removed from monitor
-- lists.
mapM_ (forwardDeath node) $
[ nid | ProcessIdentifier pid <- [ident]
, i <- Set.toList $ Set.union
(Set.map fst $ BiMultiMap.lookupBy2nd pid unaffectedLinks)
(Set.map fst $ BiMultiMap.lookupBy2nd pid unaffectedMons)
, let nid = nodeOf i
, nid /= localNodeId node
]
-- Delete monitors in the local node.
let deleteDeads :: (Ord a, Ord v)
=> BiMultiMap a ProcessId v -> BiMultiMap a ProcessId v
deleteDeads = case ident of
-- deleteAllBy2nd is faster than partitionWithKeyBy2nd
ProcessIdentifier pid -> BiMultiMap.deleteAllBy2nd pid
_ -> snd . BiMultiMap.partitionWithKeyBy2nd
(\pid _ -> ident `impliesDeathOf` ProcessIdentifier pid)
unaffectedLinks' = deleteDeads unaffectedLinks
unaffectedMons' = deleteDeads unaffectedMons
modify' $ (links ^= unaffectedLinks') . (monitors ^= unaffectedMons')
modify' $ registeredHere ^: Map.filter (\pid -> not $ ident `impliesDeathOf` ProcessIdentifier pid)
remaining <- fmap Map.toList (gets (^. registeredOnNodes)) >>=
mapM (\(pid,nidlist) ->
case ident `impliesDeathOf` ProcessIdentifier pid of
True ->
do forM_ nidlist $ \(nid,_) ->
when (not $ isLocal node (NodeIdentifier nid))
(forwardDeath node nid)
return Nothing
False -> return $ Just (pid,nidlist) )
modify' $ registeredOnNodes ^= (Map.fromList (catMaybes remaining))
where
forwardDeath node nid = ncSendToNode nid
NCMsg { ctrlMsgSender = NodeIdentifier (localNodeId node)
, ctrlMsgSignal = Died ident reason
}
-- [Unified: Table 13]
ncEffectSpawn :: ProcessId -> Closure (Process ()) -> SpawnRef -> NC ()
ncEffectSpawn pid cProc ref = do
mProc <- unClosure cProc
-- If the closure does not exist, we spawn a process that throws an exception
-- This allows the remote node to find out what's happening
-- TODO:
let proc = case mProc of
Left err -> fail $ "Error: Could not resolve closure: " ++ err
Right p -> p
node <- ask
pid' <- liftIO $ forkProcess node proc
ncSendToProcess pid $ unsafeCreateUnencodedMessage $ DidSpawn ref pid'
-- Unified semantics does not explicitly describe how to implement 'register',
-- but mentions it's "very similar to nsend" (Table 14)
-- We send a response indicated if the operation is invalid
ncEffectRegister :: ProcessId -> String -> NodeId -> Maybe ProcessId -> Bool -> NC ()
ncEffectRegister from label atnode mPid reregistration = do
node <- ask
currentVal <- gets (^. registeredHereFor label)
isOk <-
case mPid of
Nothing -> -- unregister request
return $ isJust currentVal
Just thepid -> -- register request
do isvalidlocal <- isValidLocalIdentifier (ProcessIdentifier thepid)
return $ (isNothing currentVal /= reregistration) &&
(not (isLocal node (ProcessIdentifier thepid) ) || isvalidlocal )
if isLocal node (NodeIdentifier atnode)
then do when isOk $
do modify' $ registeredHereFor label ^= mPid
updateRemote node currentVal mPid
case mPid of
(Just p) -> liftIO $ trace node (MxRegistered p label)
Nothing -> liftIO $ trace node (MxUnRegistered (fromJust currentVal) label)
newVal <- gets (^. registeredHereFor label)
ncSendToProcess from $ unsafeCreateUnencodedMessage $
RegisterReply label isOk newVal
else let operation =
case reregistration of
True -> flip decList
False -> flip incList
in case mPid of
Nothing -> return ()
Just pid -> modify' $ registeredOnNodesFor pid ^: (maybeify $ operation atnode)
where updateRemote node (Just oldval) (Just newval) | processNodeId oldval /= processNodeId newval =
do forward node (processNodeId oldval) (Register label atnode (Just oldval) True)
forward node (processNodeId newval) (Register label atnode (Just newval) False)
updateRemote node Nothing (Just newval) =
forward node (processNodeId newval) (Register label atnode (Just newval) False)
updateRemote node (Just oldval) Nothing =
forward node (processNodeId oldval) (Register label atnode (Just oldval) True)
updateRemote _ _ _ = return ()
maybeify f Nothing = unmaybeify $ f []
maybeify f (Just x) = unmaybeify $ f x
unmaybeify [] = Nothing
unmaybeify x = Just x
incList [] tag = [(tag,1)]
incList ((atag,acount):xs) tag | tag==atag = (atag,acount+1) : xs
incList (x:xs) tag = x : incList xs tag
decList [] _ = []
decList ((atag,1):xs) tag | atag == tag = xs
decList ((atag,n):xs) tag | atag == tag = (atag,n-1):xs
decList (x:xs) tag = x:decList xs tag
forward node to reg =
when (not $ isLocal node (NodeIdentifier to)) $
ncSendToNode to $ NCMsg { ctrlMsgSender = ProcessIdentifier from
, ctrlMsgSignal = reg
}
-- Unified semantics does not explicitly describe 'whereis'
ncEffectWhereIs :: ProcessId -> String -> NC ()
ncEffectWhereIs from label = do
mPid <- gets (^. registeredHereFor label)