Merge pull request #452 from danieljprice/rad-timing

Add timing for radiation and subroutines inside implicit radiation
danieljprice · Jul 17, 2023 · e745084 · e745084
2 parents 4df45e5 + 6d70ff0
commit e745084
Show file tree

Hide file tree

Showing 7 changed files with 209 additions and 62 deletions.
diff --git a/build/Makefile b/build/Makefile
@@ -538,11 +538,11 @@ SOURCES= physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 units.f90 \
          utils_filenames.f90 utils_summary.F90 ${SRCCHEM} ${SRCDUST} \
          mpi_memory.f90 mpi_derivs.F90 mpi_tree.F90 kdtree.F90 linklist_kdtree.F90 utils_healpix.f90 utils_raytracer.f90 \
          partinject.F90 utils_inject.f90 dust_formation.f90 ptmass_radiation.f90 ptmass_heating.f90 \
-         radiation_implicit.f90 ${SRCTURB} \
+         utils_deriv.f90 radiation_implicit.f90 ${SRCTURB} \
          ${SRCINJECT_DEPS} wind_equations.f90 wind.F90 ${SRCINJECT} \
          ${SRCKROME} memory.F90 ${SRCREADWRITE_DUMPS}  \
          quitdump.f90 ptmass.F90 \
-         readwrite_infile.F90 dens.F90 force.F90 utils_deriv.f90 deriv.F90 energies.F90 sort_particles.f90 \
+         readwrite_infile.F90 dens.F90 force.F90 deriv.F90 energies.F90 sort_particles.f90 \
          utils_shuffleparticles.F90 evwrite.f90 step_leapfrog.F90 writeheader.F90 ${SRCAN} step_supertimestep.F90 \
          mf_write.f90 evolve.F90 utils_orbits.f90 utils_linalg.f90 \
          checksetup.F90 initial.F90

diff --git a/docs/testing.rst b/docs/testing.rst
@@ -1,7 +1,7 @@
 Getting your code to pass the github actions
 ============================================
 
-On every pull request a sequence of continuous integration tests 
+On every pull request a sequence of continuous integration tests
 are performed to check that code is safe to merge into master.
 The scripts in the .github/workflows directory are as follows:
 
@@ -101,33 +101,72 @@ A non-exhaustive list of possible arguments are as follows:
 The buildbot
 ~~~~~~~~~~~~
 
-The buildbot runs nightly and checks that the code compiles in all of
+The buildbot also runs in an action and checks that the code compiles in all of
 the possible SETUP configurations in the Makefile. You can run this
-yourself as follows:
-
-::
+offline as follows::
 
    cd phantom/scripts
    ./buildbot.sh
 
-Because some of the setups are compiled with large static arrays which
-may exceed the memory limits of your local machine, you can optionally
-check only those setups that have idim less than some value, e.g.:
+If you want to check only those SETUPS that were failing in the actions,
+edit the buildbot.sh script and override the allsetups= line, e.g::
 
-::
+```
+allsetups='disc empty'
+```
 
-   cd phantom/scripts
-   ./buildbot.sh 1000000
-
-which checks only SETUPs with maxp set to 1 million particles or fewer.
-
-FAQ on common reasons for failure
-=================================
+Common reasons for failure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 We enforce the following policies in merging to the master branch:
 
 1. Code must compile and run with and without DEBUG=yes
 2. Code must compile with ability to change the precision of reals to real*4 (this is enforced in SETUP=blob)
 3. Code must compile with no warnings when compiled with gfortran (enforced with NOWARN=yes which adds the -Werror flag)
 4. Testsuite must work with and without MPI, i.e. compile with MPI=yes
 
-
+How to reproduce the github build environment offline
+======================================================
+Just occasionally it is hard to reproduce a failure in the actions. It *is*
+possible to recreate the github actions environment offline, using a Docker container.
+I suggest to do this *only* as a last resort. The recommended steps are::
+
+Running the actions locally
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+1. Install [Docker](https://docs.docker.com/desktop/install/mac-install/)
+2. Install [act](https://github.com/nektos/act)
+3. run the pull_request workflow::
+```
+act pull_request
+```
+
+Checking the phantom build that is failing manually
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If you just want to check things manually but in the same environment
+as used in the actions, try the following::
+
+1. Install [Docker](https://docs.docker.com/desktop/install/mac-install/)
+2. Install Docker command line tools::
+```
+brew install docker
+```
+3. Install the ubuntu-latest image in Docker, e.g. by typing in a terminal [around 6.5Gb download]::
+```
+docker pull nektos/act-environments-ubuntu:18.04
+```
+3. Run the image, and proceed to run phantom build checks manually::
+```
+git clone https://github.com/danieljprice/phantom
+mkdir -p runs/mydisc
+cd runs/mydisc
+~/phantom/scripts/writemake.sh disc > Makefile
+export DEBUG=yes
+export PHANTOM_DIR=~/phantom
+make
+make setup
+make analysis
+make moddump
+./phantomsetup disc
+./phantomsetup disc
+./phantomsetup disc
+./phantom disc
+```
diff --git a/src/main/deriv.F90 b/src/main/deriv.F90
@@ -151,6 +151,7 @@ subroutine derivs(icall,npart,nactive,xyzh,vxyzu,fxyzu,fext,divcurlv,divcurlB,&
  if (do_radiation .and. implicit_radiation .and. dt > 0.) then
     call do_radiation_implicit(dt,npart,rad,xyzh,vxyzu,radprop,drad,ierr)
     if (ierr /= 0 .and. ierr /= ierr_failed_to_converge) call fatal('radiation','Failed in radiation')
+    call do_timing('radiation',tlast,tcpulast)
  endif
 
 !

diff --git a/src/main/radiation_implicit.f90 b/src/main/radiation_implicit.f90
@@ -56,22 +56,27 @@ module radiation_implicit
 !+
 !---------------------------------------------------------
 subroutine do_radiation_implicit(dt,npart,rad,xyzh,vxyzu,radprop,drad,ierr)
- use io, only:fatal,warning
+ use io,         only:fatal,warning
+ use timing,     only:get_timings
+ use derivutils, only:do_timing
  integer, intent(in)  :: npart
  real, intent(in)     :: dt,xyzh(:,:)
  real, intent(inout)  :: radprop(:,:),rad(:,:),vxyzu(:,:),drad(:,:)
  integer, intent(out) :: ierr
  integer              :: nsubsteps,i,nit
  logical              :: failed,moresweep
  real                 :: dtsub,errorE,errorU
+ real(kind=4)         :: tlast,tcpulast
  real, allocatable    :: origEU(:,:),EU0(:,:)
 
  ierr = 0
 
  allocate(origEU(2,npart),EU0(2,npart),stat=ierr)
  if (ierr/=0) call fatal('radiation_implicit','could not allocate memory to origEU and EU0')
 
+ call get_timings(tlast,tcpulast)
  call save_radiation_energies(npart,rad,xyzh,vxyzu,radprop,drad,origEU,.false.)
+ call do_timing('radsave',tlast,tcpulast,start=.true.)
 
  nsubsteps = 1
  moresweep = .true.
@@ -86,7 +91,11 @@ subroutine do_radiation_implicit(dt,npart,rad,xyzh,vxyzu,radprop,drad,ierr)
           moresweep = .false.
           !exit over_substeps
        endif
-       if (i /= nsubsteps) call save_radiation_energies(npart,rad,xyzh,vxyzu,radprop,drad,origEU,.true.)
+       if (i /= nsubsteps) then
+          call get_timings(tlast,tcpulast)
+          call save_radiation_energies(npart,rad,xyzh,vxyzu,radprop,drad,origEU,.true.)
+          call do_timing('radsave',tlast,tcpulast)
+       endif
     enddo over_substeps
 
     !if (moresweep) then
@@ -139,20 +148,27 @@ end subroutine save_radiation_energies
 subroutine do_radiation_onestep(dt,npart,rad,xyzh,vxyzu,radprop,origEU,EU0,failed,nit,errorE,errorU,moresweep,ierr)
  use io,      only:fatal,error,iverbose,warning
  use part,    only:hfact
+ use part,    only:pdvvisc=>luminosity,dvdx,nucleation,dust_temp,eos_vars,drad,iradxi,fxyzu
  use physcon, only:pi
  use kernel,  only:radkern
+ use timing,  only:get_timings
+ use derivutils, only:do_timing
+ use options,    only:implicit_radiation_store_drad
  real, intent(in)     :: dt,xyzh(:,:),origEU(:,:)
  integer, intent(in)  :: npart
  real, intent(inout)  :: radprop(:,:),rad(:,:),vxyzu(:,:)
  logical, intent(out) :: failed,moresweep
  integer, intent(out) :: nit,ierr
  real, intent(out)    :: errorE,errorU,EU0(2,npart)
  integer, allocatable :: ivar(:,:),ijvar(:)
- integer              :: ncompact,ncompactlocal,icompactmax,nneigh_average,its
+ integer              :: ncompact,ncompactlocal,icompactmax,nneigh_average,its_global,its
  real, allocatable    :: vari(:,:),varij(:,:),varij2(:,:),varinew(:,:)
- real :: maxerrE2,maxerrU2,maxerrE2last,maxerrU2last
+ real                 :: maxerrE2,maxerrU2,maxerrE2last,maxerrU2last
+ real(kind=4)         :: tlast,tcpulast,t1,tcpu1
  logical :: converged
 
+ call get_timings(tlast,tcpulast)
+
  failed = .false.
  errorE = 0.
  errorU = 0.
@@ -169,46 +185,90 @@ subroutine do_radiation_onestep(dt,npart,rad,xyzh,vxyzu,radprop,origEU,EU0,faile
 
  !dtimax = dt/imaxstep
  call get_compacted_neighbour_list(xyzh,ivar,ijvar,ncompact,ncompactlocal)
+
  ! check for errors
  if (ncompact <= 0 .or. ncompactlocal <= 0) then
     call error('radiation_implicit','empty neighbour list - need to call set_linklist first?')
     ierr = ierr_neighbourlist_empty
     return
  endif
 
- !$omp parallel
+ call do_timing('radneighlist',tlast,tcpulast,start=.true.)
+
+ !$omp parallel default(none) &
+ !$omp shared(tlast,tcpulast,ncompact,ncompactlocal,npart,icompactmax,dt,its_global) &
+ !$omp shared(xyzh,vxyzu,ivar,ijvar,varinew,radprop,rad,vari,varij,varij2,origEU,EU0) &
+ !$omp shared(pdvvisc,dvdx,nucleation,dust_temp,eos_vars,drad,fxyzu,implicit_radiation_store_drad) &
+ !$omp shared(converged,maxerrE2,maxerrU2,maxerrE2last,maxerrU2last,itsmax_rad,moresweep,tol_rad,iverbose,ierr) &
+ !$omp private(t1,tcpu1,its)
  call fill_arrays(ncompact,ncompactlocal,npart,icompactmax,dt,&
                   xyzh,vxyzu,ivar,ijvar,radprop,rad,vari,varij,varij2,EU0)
 
+ !$omp master
+ call do_timing('radarrays',tlast,tcpulast)
+ !$omp end master
+
+ !$omp single
  maxerrE2last = huge(0.)
  maxerrU2last = huge(0.)
+ !$omp end single
 
  iterations: do its=1,itsmax_rad
+
+    !$omp master
+    call get_timings(t1,tcpu1)
+    !$omp end master
     call compute_flux(ivar,ijvar,ncompact,npart,icompactmax,varij,varij2,vari,EU0,varinew,radprop)
+    !$omp master
+    call do_timing('radflux',t1,tcpu1)
+    !$omp end master
     call calc_lambda_and_eddington(ivar,ncompactlocal,npart,vari,EU0,radprop,ierr)
+    !$omp master
+    call do_timing('radlambda',t1,tcpu1)
+    !$omp end master
     call calc_diffusion_term(ivar,ijvar,varij,ncompact,npart,icompactmax,radprop,vari,EU0,varinew,ierr)
+    !$omp master
+    call do_timing('raddiff',t1,tcpu1)
+    !$omp end master
+
     call update_gas_radiation_energy(ivar,ijvar,vari,ncompact,npart,ncompactlocal,&
-                                     vxyzu,radprop,rad,origEU,varinew,EU0,moresweep,maxerrE2,maxerrU2)
+                                     vxyzu,radprop,rad,origEU,varinew,EU0,&
+                                     pdvvisc,dvdx,nucleation,dust_temp,eos_vars,drad,fxyzu,&
+                                     implicit_radiation_store_drad,moresweep,maxerrE2,maxerrU2)
 
-    if (iverbose >= 2) print*,'iteration: ',its,' error = ',maxerrE2,maxerrU2
+    !$omp master
+    call do_timing('radupdate',t1,tcpu1)
+    !$omp end master
+
+    !$omp single
+    if (iverbose >= 2) then
+       print*,'iteration: ',its,' error = ',maxerrE2,maxerrU2
+    endif
     converged = (maxerrE2 <= tol_rad .and. maxerrU2 <= tol_rad)
+    maxerrU2last = maxerrU2
+    !$omp end single
+
     if (converged) exit iterations
 
-    maxerrU2last = maxerrU2
  enddo iterations
 
  !$omp single
+ its_global = its    ! save the iteration count, should be same on all threads
+ !$omp end single
+
+ !$omp end parallel
+
  if (converged) then
     if (iverbose >= 0) print "(1x,a,i4,a,es10.3,a,es10.3)", &
-          trim(label)//': succeeded with ',its,' iterations: xi err:',maxerrE2,' u err:',maxerrU2
+          trim(label)//': succeeded with ',its_global,' iterations: xi err:',maxerrE2,' u err:',maxerrU2
  else
     call warning('radiation_implicit','maximum iterations reached')
     moresweep = .true.
  endif
- !$omp end single
- !$omp end parallel
 
+ call do_timing('radits',tlast,tcpulast)
  call store_radiation_results(ncompactlocal,npart,ivar,EU0,rad,vxyzu)
+ call do_timing('radstore',tlast,tcpulast)
 
 end subroutine do_radiation_onestep
 
@@ -372,8 +432,6 @@ subroutine fill_arrays(ncompact,ncompactlocal,npart,icompactmax,dt,xyzh,vxyzu,iv
 
  c_code = get_c_code()
  !$omp do &
- !!$omp shared(EU0,radprop,rad,xyzh,vxyzu,c_code,vari,ivar,ijvar,varij,varij2,dvdx,dxbound,dybound,dzbound) &
- !!$omp shared(dust_temp,ncompactlocal,ncompact,massoftype,iopacity_type,nucleation,dt,gradh,cv_type) &
  !$omp private(n,i,j,k,rhoi,icompact,pmi,dvxdxi,dvxdyi,dvxdzi,dvydxi,dvydyi,dvydzi,dti) &
  !$omp private(dvzdxi,dvzdyi,dvzdzi,dx,dy,dz,rij2,rij,rij1,dr,pmj,rhoj,hi,hj,hi21,hj21,hi41,hj41) &
  !$omp private(v2i,vi,v2j,vj,dWi,dWj,dvx,dvy,dvz,rhomean,dvdotdr,dv,vmu,dvdWimj,dvdWimi,dvdWjmj) &
@@ -543,7 +601,6 @@ subroutine compute_flux(ivar,ijvar,ncompact,npart,icompactmax,varij,varij2,vari,
  real                :: rhoi,rhoj,pmjdWrunix,pmjdWruniy,pmjdWruniz,dedxi,dedyi,dedzi,dradenij
 
  !$omp do schedule(runtime)&
- !!$omp shared(vari,ivar,EU0,varij2,ijvar,varij,ncompact,radprop,varinew)&
  !$omp private(i,j,k,n,dedxi,dedyi,dedzi,rhoi,rhoj,icompact)&
  !$omp private(pmjdWrunix,pmjdWruniy,pmjdWruniz,dradenij)
 
@@ -596,12 +653,12 @@ subroutine calc_lambda_and_eddington(ivar,ncompactlocal,npart,vari,EU0,radprop,i
  integer, intent(in) :: ivar(:,:),ncompactlocal,npart
  real, intent(in)    :: vari(:,:),EU0(2,npart)
  real, intent(inout) :: radprop(:,:)
- integer             :: n,i,ierr
+ integer, intent(out) :: ierr
+ integer             :: n,i
  real                :: rhoi,gradE1i,opacity,radRi
 
  ierr = 0
- !$omp do &
- !!$omp shared(vari,ivar,radprop,ncompactlocal,EU0,dust_temp,nucleation,limit_radiation_flux) &
+ !$omp do schedule(runtime)&
  !$omp private(i,n,rhoi,gradE1i,opacity,radRi) &
  !$omp reduction(max:ierr)
  do n = 1,ncompactlocal
@@ -653,8 +710,7 @@ subroutine calc_diffusion_term(ivar,ijvar,varij,ncompact,npart,icompactmax, &
  real                 :: diffusion_numerator,diffusion_denominator,tempval1,tempval2
 
  ierr = 0
- !$omp do &
- !!$omp shared(vari,varij,ivar,ijvar,radprop,ncompact,EU0,varinew,dust_temp,nucleation)&
+ !$omp do schedule(runtime)&
  !$omp private(i,j,k,n,rhoi,rhoj,opacityi,opacityj,bi,bj,b1,diffusion_numerator,diffusion_denominator)&
  !$omp private(dWiidrlightrhorhom,dWidrlightrhorhom,dWjdrlightrhorhom,tempval1,tempval2,icompact)&
  !$omp reduction(max:ierr)
@@ -741,17 +797,21 @@ end subroutine calc_diffusion_term
 !+
 !---------------------------------------------------------
 subroutine update_gas_radiation_energy(ivar,ijvar,vari,ncompact,npart,ncompactlocal,&
-                                       vxyzu,radprop,rad,origEU,varinew,EU0,moresweep,maxerrE2,maxerrU2)
+                                       vxyzu,radprop,rad,origEU,varinew,EU0,&
+                                       pdvvisc,dvdx,nucleation,dust_temp,eos_vars,drad,fxyzu, &
+                                       store_drad,moresweep,maxerrE2,maxerrU2)
  use io,      only:fatal,error
- use part,    only:pdvvisc=>luminosity,dvdx,nucleation,dust_temp,eos_vars,drad,iradxi,fxyzu
  use units,   only:get_radconst_code,get_c_code,unit_density
  use physcon, only:mass_proton_cgs
  use eos,     only:metallicity=>Z_in
- use options, only:implicit_radiation_store_drad
  integer, intent(in) :: ivar(:,:),ijvar(:),ncompact,npart,ncompactlocal
  real, intent(in)    :: vari(:,:),varinew(3,npart),rad(:,:),origEU(:,:),vxyzu(:,:)
+ real(kind=4), intent(in) :: pdvvisc(:),dvdx(:,:)
+ real, intent(in)    :: eos_vars(:,:)
+ real, intent(inout) :: drad(:,:),fxyzu(:,:),nucleation(:,:),dust_temp(:)
  real, intent(inout) :: radprop(:,:),EU0(2,npart)
  real, intent(out)   :: maxerrE2,maxerrU2
+ logical, intent(in) :: store_drad
  logical, intent(out):: moresweep
  integer             :: i,j,n,ieqtype,ierr
  logical             :: moresweep2,skip_quartic
@@ -766,13 +826,12 @@ subroutine update_gas_radiation_energy(ivar,ijvar,vari,ncompact,npart,ncompactlo
  a_code = get_radconst_code()
  c_code = get_c_code()
  moresweep = .false.
+ !$omp single
  maxerrE2 = 0.
  maxerrU2 = 0.
+ !$omp end single
 
- !$omp do &
- !!$omp shared(vari,ivar,ijvar,radprop,rad,ncompact,ncompactlocal,EU0,varinew) &
- !!$omp shared(dvdx,origEU,nucleation,dust_temp,eos_vars,implicit_radiation_store_drad,cv_type) &
- !!$omp shared(moresweep,pdvvisc,metallicity,vxyzu,iopacity_type,a_code,c_code,massoftype,drad,fxyzu) &
+ !$omp do schedule(runtime)&
  !$omp private(i,j,n,rhoi,dti,diffusion_numerator,diffusion_denominator,U1i,skip_quartic,Tgas,E1i,dUcomb,dEcomb) &
  !$omp private(gradEi2,gradvPi,rpdiag,rpall,radpresdenom,stellarradiation,dust_tempi,dust_kappai,xnH2) &
  !$omp private(dust_cooling,heatingISRi,dust_gas,gas_dust_val,dustgammaval,gas_dust_cooling,cosmic_ray) &
@@ -1010,7 +1069,7 @@ subroutine update_gas_radiation_energy(ivar,ijvar,vari,ncompact,npart,ncompactlo
     radprop(icv,i) = get_cv(rhoi,EU0(2,i),cv_type)
     radprop(ikappa,i) = get_kappa(iopacity_type,EU0(2,i),radprop(icv,i),rhoi)
 
-    if (implicit_radiation_store_drad) then  ! use this for testing
+    if (store_drad) then  ! use this for testing
        drad(iradxi,i) = (E1i - origEU(1,i))/dti  ! dxi/dt
        fxyzu(4,i) = (U1i - origEU(2,i))/dti      ! du/dt
     endif