Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
multi-thread support for MadGraph LO process
- Loading branch information
Showing
2 changed files
with
269 additions
and
0 deletions.
There are no files selected for viewing
151 changes: 151 additions & 0 deletions
151
GeneratorInterface/LHEInterface/data/run_generic_tarball_cvmfs_madgraphLO_multithread.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
#!/bin/bash | ||
|
||
#script to run generic lhe generation tarballs | ||
#kept as simply as possible to minimize need | ||
#to update the cmssw release | ||
#(all the logic goes in the run script inside the tarball | ||
# on frontier) | ||
#J.Bendavid | ||
|
||
#exit on first error | ||
set -e | ||
|
||
echo "[MT] NOTE: The script provides a cure for earlier MadGraph LO gridpacks to enable multi-threading (MT). It is also flexible enough to handle all versions of input gridpack, by applying necessary patches and/or fix bugs depending on the gridpack. In this sense, it disobeys the original goal to make this code simplest and keep ALL the logic in 'runcmsgrid.sh' inside the tarball. It is hence expected to switch back to the original 'run_generic_tarball_cvmfs.sh' at a proper time when the new gridpacks are starting to equip with the MT feature." | ||
|
||
echo " ______________________________________ " | ||
echo " Running Generic Tarball/Gridpack " | ||
echo " ______________________________________ " | ||
|
||
path=${1} | ||
echo "gridpack tarball path = $path" | ||
|
||
nevt=${2} | ||
echo "%MSG-MG5 number of events requested = $nevt" | ||
|
||
rnum=${3} | ||
echo "%MSG-MG5 random seed used for the run = $rnum" | ||
|
||
ncpu=${4} | ||
echo "%MSG-MG5 thread count requested = $ncpu" | ||
|
||
echo "%MSG-MG5 residual/optional arguments = ${@:5}" | ||
|
||
if [ -n "${5}" ]; then | ||
use_gridpack_env=${5} | ||
echo "%MSG-MG5 use_gridpack_env = $use_gridpack_env" | ||
fi | ||
|
||
if [ -n "${6}" ]; then | ||
scram_arch_version=${6} | ||
echo "%MSG-MG5 override scram_arch_version = $scram_arch_version" | ||
fi | ||
|
||
if [ -n "${7}" ]; then | ||
cmssw_version=${7} | ||
echo "%MSG-MG5 override cmssw_version = $cmssw_version" | ||
fi | ||
|
||
LHEWORKDIR=`pwd` | ||
|
||
if [ "$use_gridpack_env" = false -a -n "$scram_arch_version" -a -n "$cmssw_version" ]; then | ||
echo "%MSG-MG5 CMSSW version = $cmssw_version" | ||
export SCRAM_ARCH=${scram_arch_version} | ||
scramv1 project CMSSW ${cmssw_version} | ||
cd ${cmssw_version}/src | ||
eval `scramv1 runtime -sh` | ||
cd $LHEWORKDIR | ||
fi | ||
|
||
if [[ -d lheevent ]] | ||
then | ||
echo 'lheevent directory found' | ||
echo 'Setting up the environment' | ||
rm -rf lheevent | ||
fi | ||
mkdir lheevent; cd lheevent | ||
|
||
#untar the tarball directly from cvmfs | ||
tar -xaf ${path} | ||
|
||
######################################### | ||
# Here starts the new implementation: | ||
# fix the code depending on the gridpack version to enable multi-thread | ||
######################################### | ||
|
||
# exit if the gridpack is not a MG LO one | ||
if [[ ! -e process/madevent/SubProcesses/MGVersion.txt ]]; then | ||
echo "[MT] Error: this script only works for the MG LO gridpack, while this gridpack might be a MG NLO or non-MG one. Please set 'scriptName' as 'GeneratorInterface/LHEInterface/data/run_generic_tarball_cvmfs.sh' instead." | ||
exit 1 | ||
fi | ||
|
||
MGVersion=$(cat process/madevent/SubProcesses/MGVersion.txt) | ||
echo "[MT] Detected MG verion: ${MGVersion}" | ||
|
||
MGVersion=(${MGVersion//./ }) | ||
|
||
if [[ ${MGVersion[1]} -lt 6 ]] || [[ ${MGVersion[1]} -eq 6 && ${MGVersion[2]} -eq 0 ]]; then | ||
echo "[MT] Warning: multi-threading is not supported in MG version < 2.6.1. Will not activate the multi-thread feature." | ||
elif ([[ ${MGVersion[1]} -eq 6 ]] || [[ ${MGVersion[1]} -eq 7 && ${MGVersion[2]} -le 2 ]]) && [[ -e process/madevent/Cards/MadLoopParams.dat ]]; then | ||
echo "[MT] Warning: multi-threading is not supported for loop-induced processes in MG version <= 2.7.2. Will not activate the multi-thread feature." | ||
else | ||
# will activate multi-thread feature | ||
# first fix a multi-thread bug for MG<=2.7.2 | ||
if [[ ${MGVersion[1]} -eq 6 ]] || [[ ${MGVersion[1]} -eq 7 && ${MGVersion[2]} -le 2 ]]; then | ||
echo "[MT] Apply a patch to fix multithread bug in 2.6.1<=MG=2.7.2" | ||
patch process/madevent/bin/internal/madevent_interface.py << EOF | ||
=== modified file 'madgraph/interface/madevent_interface.py' | ||
--- madgraph/interface/madevent_interface.py 2020-04-23 12:03:18 +0000 | ||
+++ madgraph/interface/madevent_interface.py 2020-04-23 15:49:28 +0000 | ||
@@ -6667,11 +6667,11 @@ | ||
sum_axsec += result.get('axsec')*gscalefact[Gdir] | ||
if len(AllEvent) >= 80: #perform a partial unweighting | ||
- AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials), | ||
+ AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials), | ||
get_wgt, log_level=5, trunc_error=1e-2, event_target=self.nb_event) | ||
AllEvent = lhe_parser.MultiEventFile() | ||
AllEvent.banner = self.banner | ||
- AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials), | ||
+ AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials), | ||
sum_xsec, | ||
math.sqrt(sum(x**2 for x in sum_xerru)), | ||
sum_axsec) | ||
EOF | ||
fi | ||
|
||
# fix another multi-thread related bug for MG 2.6.1 only | ||
if [[ ${MGVersion[1]} -eq 6 && ${MGVersion[2]} -eq 1 ]]; then | ||
echo "[MT] Apply another patch to fix multithread bug in MG 2.6.1" | ||
sed -i "/def collect\_result/a\ main_dir = '$(pwd)/process/madevent/SubProcesses'" process/madevent/bin/internal/sum_html.py | ||
fi | ||
|
||
# patch on runcmsgrid.sh if old version is detected | ||
if grep -q "succ_setreadonly" runcmsgrid.sh; then | ||
echo "[MT] Congratulations. You are using the new runcmsgrid.sh script with the MG LO multi-thread feature already implemented. Will use this script for event generation without any patch." | ||
else | ||
echo "[MT] Old runcmsgrid.sh script detected. This means you are working on an earlier gridpack where MG LO multi-thread feature is not implemented. Will patch on the runcmsgrid.sh code to enable multi-thread feature." | ||
PATCHDIR=${0%/*} | ||
cp runcmsgrid.sh runcmsgrid.sh.bak | ||
cp ${PATCHDIR}/runcmsgrid_LO_support_multithread.patch . | ||
patch runcmsgrid.sh runcmsgrid_LO_support_multithread.patch | ||
fi | ||
fi | ||
######################################### | ||
|
||
|
||
# If TMPDIR is unset, set it to the condor scratch area if present | ||
# and fallback to /tmp | ||
export TMPDIR=${TMPDIR:-${_CONDOR_SCRATCH_DIR:-/tmp}} | ||
|
||
#generate events | ||
./runcmsgrid.sh $nevt $rnum $ncpu ${@:5} | ||
|
||
mv cmsgrid_final.lhe $LHEWORKDIR/ | ||
|
||
cd $LHEWORKDIR | ||
|
||
#cleanup working directory (save space on worker node for edm output) | ||
rm -rf lheevent | ||
|
||
exit 0 | ||
|
118 changes: 118 additions & 0 deletions
118
GeneratorInterface/LHEInterface/data/runcmsgrid_LO_support_multithread.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
--- runcmsgrid.sh 2020-05-01 03:27:51.000000001 +0200 | ||
+++ runcmsgrid.sh 2020-05-02 10:46:24.000000001 +0200 | ||
@@ -43,6 +43,30 @@ | ||
fi | ||
cd $LHEWORKDIR | ||
|
||
+# test if the current file system allow setting folder permission to read-only. | ||
+succ_setreadonly=true | ||
+mkdir testpermit | ||
+if fs listacl &>/dev/null; then | ||
+ # AFS system detected. Use "fs sa" rather than "chmod" to set permission | ||
+ echo "[MT] AFS system detected" | ||
+ fs sa -dir testpermit -acl ${USER} read | ||
+ if touch testpermit/newfile &>/dev/null; then succ_setreadonly=false; fi | ||
+ fs sa -dir testpermit -acl ${USER} all | ||
+else | ||
+ chmod -w testpermit | ||
+ if touch testpermit/newfile &>/dev/null; then succ_setreadonly=false; fi | ||
+ chmod +w testpermit | ||
+fi | ||
+rm -r testpermit | ||
+if [ $succ_setreadonly = false ]; then | ||
+ echo "[MT] Warning: failed to set a folder to read-only mode with the current file system. Will use the normal mode and run with single core instead. Note that the script only works under directories in ordinary Unix file system or AFS system, while you are probably using other systems, e.g. EOS. This should NOT happen in a CRAB job. Please report the error if you see this in a CRAB job." | ||
+fi | ||
+ | ||
+if fs listacl &>/dev/null; then | ||
+ fs sa -dir process/madevent -acl ${USER} all | ||
+else | ||
+ chmod +w process/madevent | ||
+fi | ||
cd process | ||
|
||
#make sure lhapdf points to local cmssw installation area | ||
@@ -56,6 +80,18 @@ | ||
echo "nb_core = $ncpu" >> ./madevent/Cards/me5_configuration.txt | ||
#fi | ||
|
||
+function event_generate_per_thread () { | ||
+ | ||
+# number of event to generate and seed in this thread | ||
+thd=${1} | ||
+nevt=${2} | ||
+rnum=${3} | ||
+ | ||
+if [ -d thread${thd} ]; then | ||
+ rm -r thread${thd} | ||
+fi | ||
+mkdir thread${thd} | ||
+cd thread${thd} | ||
######################################### | ||
# FORCE IT TO PRODUCE EXACTLY THE REQUIRED NUMBER OF EVENTS | ||
######################################### | ||
@@ -95,7 +131,7 @@ | ||
# run mg5_amc | ||
echo "produced_lhe " $produced_lhe "nevt " $nevt "submitting_event " $submitting_event " remaining_event " $remaining_event | ||
echo run.sh $submitting_event $run_random_seed | ||
- ./run.sh $submitting_event $run_random_seed | ||
+ ../process/run.sh $submitting_event $run_random_seed | ||
|
||
# compute number of events produced in the iteration | ||
produced_lhe=$(($produced_lhe+`zgrep \<event events.lhe.gz | wc -l`)) | ||
@@ -121,6 +157,56 @@ | ||
mv events_${run_counter}.lhe.gz events.lhe.gz | ||
fi | ||
|
||
+cd $LHEWORKDIR | ||
+ | ||
+} ### end of function | ||
+ | ||
+ | ||
+if [ $succ_setreadonly = false ] || [ $ncpu -eq 1 ] || [ $nevt -lt $ncpu ]; then | ||
+ echo "[MT] Use normal mode and run on single core" | ||
+ cd $LHEWORKDIR | ||
+ event_generate_per_thread 0 $nevt $rnum | ||
+ mv thread0/events.lhe.gz process/ | ||
+ rm -r thread0 | ||
+ cd process | ||
+else | ||
+ echo "[MT] Activate multi-threading for event generation -- will use $ncpu cores" | ||
+ nevt_ave=$(( $nevt / $ncpu )) | ||
+ for i in `seq 0 $(( $ncpu-2 ))`; do | ||
+ nevt_per_thread[$i]=$nevt_ave | ||
+ done | ||
+ nevt_per_thread[$(( $ncpu-1 ))]=$(( $nevt - ($ncpu-1)*$nevt_ave )) | ||
+ | ||
+ cd $LHEWORKDIR | ||
+ | ||
+ # make the gridpack directory read-only to enable the multi-threading feature | ||
+ if fs listacl &>/dev/null; then | ||
+ fs sa -dir process/madevent -acl ${USER} read | ||
+ else | ||
+ chmod -w process/madevent | ||
+ fi | ||
+ | ||
+ # when interrupt, resume write access and kill ALL multi-thread event generation commands | ||
+ trap "cd $LHEWORKDIR; if fs listacl &>/dev/null; then fs sa -dir process/madevent -acl ${USER} all; else chmod +w process/madevent; fi; kill 0" SIGINT SIGTERM EXIT | ||
+ for i in `seq 0 $(( $ncpu-1 ))`; do | ||
+ event_generate_per_thread $i ${nevt_per_thread[$i]} $((rnum+10*$i)) | sed -e "s/^/[Thread $i] /" & | ||
+ done; wait | ||
+ trap - SIGINT SIGTERM EXIT # resume | ||
+ | ||
+ if fs listacl &>/dev/null; then | ||
+ fs sa -dir process/madevent -acl ${USER} all | ||
+ else | ||
+ chmod +w process/madevent | ||
+ fi | ||
+ cd process | ||
+ | ||
+ # merge files produced in different threads | ||
+ cp /cvmfs/cms.cern.ch/phys_generator/gridpacks/lhe_merger/merge.pl ./ | ||
+ chmod 755 merge.pl | ||
+ ./merge.pl ../thread*/events.lhe.gz events.lhe.gz banner.txt | ||
+ rm -r ../thread* banner.txt; | ||
+fi | ||
+ | ||
######################################### | ||
######################################### | ||
######################################### |