forked from darwinproject/darwin3
-
Notifications
You must be signed in to change notification settings - Fork 1
/
check_threads.F
169 lines (149 loc) · 5.55 KB
/
check_threads.F
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#include "CPP_EEOPTIONS.h"
CBOP
C !ROUTINE: CHECK_THREADS
C !INTERFACE:
SUBROUTINE CHECK_THREADS( myThid )
C !DESCRIPTION:
C *==========================================================
C | SUBROUTINE CHECK\_THREADS
C | o Check that all the threads we need did indeed start.
C *==========================================================
C | This routine is called during the initialisation phase
C | to check whether all the threads have started.
C | It is invoked by every thread and if any thread finds an
C | error it should set its error flag.
C | Notes:
C | Different mechanisms may be required on different
C | platforms to actually perform the check. For example as
C | coded here each thread checks for a semaphore set by the
C | other threads to see if they are running.
C | It is also possible for a system to schedule threads
C | sequentially, unless some system call is made to yield
C | the process. This routine would detect this situation too
C | and allow a programmer to modify this routine and the
C | barrier code to allow threads to be scheduled more
C | appropriately.
C *==========================================================
C !USES:
IMPLICIT NONE
C == Global variables ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
C !INPUT PARAMETERS:
C == Routine arguments ==
C myThid :: My thread number
INTEGER myThid
C !FUNCTIONS:
#ifdef USE_OMP_THREADING
INTEGER OMP_GET_NUM_THREADS
EXTERNAL OMP_GET_NUM_THREADS
#endif
C !LOCAL VARIABLES:
C == Local variables ==
C I :: Loop counter
C numberThreadRunning :: Count of number of threads this thread
C thinks are running.
C nChecks :: Number of times checked for all threads. After so
C many checks give up and report an error.
C msgBuf :: Informational/error message buffer
INTEGER nChecks
CHARACTER*(MAX_LEN_MBUF) msgBuf
#ifdef USE_OMP_THREADING
#ifdef ALLOW_USE_MPI
INTEGER myErr, mpiRC
#endif
#else /* USE_OMP_THREADING */
INTEGER I, numberThreadsRunning
#endif /* USE_OMP_THREADING */
CEOP
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
#ifdef USE_OMP_THREADING
C-- Check early-on that number of threads match
IF ( OMP_GET_NUM_THREADS() .NE. nThreads ) THEN
C- This process has problems in multi-threads setting (detected by
C all pseudo-threads); note: cannot use any Barrier in this context
WRITE(msgBuf,'(2A,I6)') 'CHECK_THREADS:',
& ' from "eedata", nThreads=', nThreads
CALL PRINT_ERROR( msgBuf, myThid )
WRITE(msgBuf,'(2A,I6)') ' not equal to ',
& 'Env.Var. OMP_NUM_THREADS=', OMP_GET_NUM_THREADS()
CALL PRINT_ERROR( msgBuf, myThid )
thError(myThid) = .TRUE.
eeBootError = .TRUE.
IF ( myThid.EQ.1 ) THEN
C- one pseudo-thread (thId=1) export the error to other MPI processes
nChecks = 1
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
myErr = nChecks
CALL MPI_Allreduce( myErr,nChecks,1,MPI_INTEGER,
& MPI_SUM,MPI_COMM_MODEL,mpiRC )
ENDIF
#endif /* ALLOW_USE_MPI */
ENDIF
ELSE
C- this process has a working multi-threads setting
threadIsRunning(myThid) = .TRUE.
IF ( myThid.EQ.1 ) THEN
C- master collects error from other MPI processes
nChecks = 0
#ifdef ALLOW_USE_MPI
IF ( usingMPI ) THEN
myErr = nChecks
CALL MPI_Allreduce( myErr,nChecks,1,MPI_INTEGER,
& MPI_SUM,MPI_COMM_MODEL,mpiRC )
ENDIF
#endif /* ALLOW_USE_MPI */
IF ( nChecks.NE.0 ) THEN
WRITE(msgBuf,'(A,I5,A)') 'CHECK_THREADS:', nChecks,
& ' error(s) from other Processes'
CALL PRINT_ERROR( msgBuf, myThid )
eeBootError = .TRUE.
ENDIF
ENDIF
C- ensure all threads leave with updated eeBootError (shared) value
C$OMP BARRIER
ENDIF
#else /* ndef USE_OMP_THREADING */
threadIsRunning(myThid) = .TRUE.
nChecks = 0
10 CONTINUE
numberThreadsRunning = 0
nChecks = nChecks + 1
DO I = 1, nThreads
IF ( threadIsRunning(I) )
& numberThreadsRunning = numberThreadsRunning+1
ENDDO
IF ( nChecks .GT. 10 ) THEN
thError(myThid) = .TRUE.
eeBootError = .TRUE.
WRITE(msgBuf,'(A,I5,A,I5,A)')
& 'CHECK_THREADS: Only ',numberThreadsRunning,
& ' thread(s), ',nThreads,' are needed for this config!'
CALL PRINT_ERROR( msgBuf, myThid )
C-- Not enough threads are running so halt the program.
C I did not want this here but it is the only place I have found that
C KAP deadlocks if it there are fewer threads than iterations in a BLOCKED(1)
C loop. The deadlock appears to be in the routine mppjoin which never
C returns. I tried putting the STOP in main or breaking out of the loop in main
C but this causes KAP to insert a call to mppjoin - which then deadlocks!
IF ( myThid .EQ. 1 ) THEN
STOP 'ABNORMAL END: S/R CHECK_THREADS'
ENDIF
GOTO 11
ENDIF
IF ( numberThreadsRunning .NE. nThreads ) THEN
#ifdef HAVE_SYSTEM
CALL SYSTEM('sleep 1')
#endif
GOTO 10
ENDIF
11 CONTINUE
#endif /* ndef USE_OMP_THREADING */
C-- check barrier synchronization: 1rst (initial) call.
IF ( .NOT. eeBootError ) THEN
CALL BAR_CHECK( 1, myThid )
ENDIF
RETURN
END