Skip to content

Commit

Permalink
Attempt at reading metadata flags from input (#1665)
Browse files Browse the repository at this point in the history
* working attempt

* more AB releasese

* these rels should work

* maybe this fixes it

* add back old rels

* bring back master

* i forgot cmake

* remove 2 awful releases

* add comment

* fixes to logger and root

* add warning

---------

Co-authored-by: Sagar Addepalli <addepalli.sagar@cern.ch>
  • Loading branch information
SagarA17 and Sagar Addepalli committed Jan 25, 2024
1 parent 54f6bf2 commit 99174eb
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 5 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ jobs:
- 24.2.23
- 24.2.24
- 24.2.25
- 24.2.26
- 24.2.29
- 24.2.30
- 24.2.31
- 24.2.32
- 24.2.33
- 24.2.34

steps:
- uses: actions/checkout@master
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ RUN export RELEASE_TYPE=$([ "$DOCKER_IMG" == "analysisbase" ] && echo "AnalysisB
&& source /release_setup.sh \
&& mkdir -p /workarea/build \
&& cd /workarea/build \
#-DATLAS_USE_CUSTOM_CPACK_INSTALL_SCRIPT=TRUE is needed for some AB releases.
&& time cmake ../src -DATLAS_USE_CUSTOM_CPACK_INSTALL_SCRIPT=TRUE \
&& time make -j2 \
&& cpack \
Expand Down
6 changes: 6 additions & 0 deletions python/cli_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@
"nargs": "+",
"default": [],
},
"autoFlags": {
"action": "store_true",
"help": "If enabled, meta flags will be auto read from input files (available 24.2.29 onwards).",
"dest": "auto_flags",
"default": False,
},
}

# These are handled by xAH_run.py at the top level instead of down by drivers
Expand Down
154 changes: 154 additions & 0 deletions python/metaConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from Campaigns.Utils import Campaign
from AthenaConfiguration.Enums import LHCPeriod
from AnalysisAlgorithmsConfig.ConfigAccumulator import DataType

_campaigns_AMITag = {
# NOTE this is a "fallback" approach to read campaign based on standard r-tag with pile-up for
# MC campaigns. For non-standard reconstruction, extra r-tags have to be added here to be recognized.
# Recommended approach is to read mc_campaign from FileMetaData (seems to require reasonably recent p-tags)
Campaign.MC20a: ['r13167', 'r14859'],
Campaign.MC20d: ['r13144', 'r14860'],
Campaign.MC20e: ['r13145', 'r14861'],
Campaign.MC21a: ['r13829'],
Campaign.MC23a: ['r14622'],
Campaign.MC23c: ['r14799'],
}

_years_runNumbers = {
# for each year (dict key), provide list of [min runNumber, max runNumber)
# i.e. we check min runNumber <= runNumber < max runNumber
2015: [0, 290000],
2016: [290000, 320000],
2017: [320000, 342000],
2018: [342000, 400000],
2022: [410000, 450000],
2023: [450000, 999999]
}

_campaigns_R2 = [
Campaign.MC20a, Campaign.MC20d, Campaign.MC20e
]

_campaigns_R3 = [
Campaign.MC21a, Campaign.MC23a, Campaign.MC23c,
]


def populate_config_flags(flags, metadata):
"""
Populate additional information in the AllConfigFlags from FileMetaData
"""
flags.addFlag('Input.AMITag', metadata.get('AMITag', ''))
if len(flags.Input.RunNumber) != 1:
print('WARNING (metaConfig.populate_config_flags): FileMetaData reports RunNumber list '
f'with not exactly 1 entry: {flags.Input.RunNumber}')
flags.addFlag('Input.RunNumberAsInt', int(flags.Input.RunNumber[0]))
flags.addFlag('Input.DataType', get_data_type)
is_data = (flags.Input.DataType is DataType.Data)
if not is_data:
# try a fallback solution to determine MC campaign
# this is for samples, that don't include the MCCampaign entry in FileMetaData
# this problem should be fixed in p58XX tags
if flags.Input.MCCampaign == Campaign.Unknown:
flags.Input.MCCampaign = get_campaign_fallback
flags.addFlag('Input.LHCPeriod', get_LHCgeometry)
flags.addFlag('Input.isRun3', isRun3)
flags.addFlag('Input.isPHYSLITE', isPhysLite)


def get_data_type(flags):
if not flags.Input.isMC:
return DataType.Data
else:
if flags.Sim.ISF.Simulator.isFullSim():
return DataType.FullSim
elif flags.Sim.ISF.Simulator.usesFastCaloSim():
return DataType.FastSim
else:
raise Exception('Could not determine data_type, '
'perhaps SimulationFlavour metadata is missing')


def get_campaign_fallback(flags):
"""
In case MC Campaign is not stored in FileMetaData, we can try to figure it out from AMI tag.
"""
amiTags = flags.Input.AMITag
if amiTags == '':
print('WARNING (metaConfig.get_campaign): AMITag entry in FileMetaData '
'appears to be empty or does not exist')

for (cmp, tagsList) in _campaigns_AMITag.items():
for tag in tagsList:
if tag in amiTags:
return cmp
raise Exception(f'AMITag {amiTags} in FileMetaData does not correspond to any implemented campaign')


def get_data_year(flags):
"""
Try to determine the year of data-taking based on runNumber.
"""
if flags.Input.RunNumberAsInt == 0:
print('ERROR (metaConfig.get_data_year): runNumber == 0, we cannot determine data year reliably.')
for (year, runRange) in _years_runNumbers.items():
if flags.Input.RunNumberAsInt >= runRange[0] and flags.Input.RunNumberAsInt < runRange[1]:
return year
print(f'WARNING (metaConfig.get_data_year): runNumber {flags.Input.RunNumberAsInt} '
'does not correspond to any of the defined years of data taking!')
return 0

def isPhysLite(flags):
"""
Check whether the derivation format is PHYSLITE.
"""
if flags.Input.ProcessingTags is not None:
return 'StreamDAOD_PHYSLITE' in flags.Input.ProcessingTags
else:
print('WARNING Could not find any information about the sample being PHYSLITE '
'in the metadata. Will assume that it was regular PHYS.')
return False

def isRun3(flags):
if flags.Input.DataType is DataType.Data:
year = get_data_year(flags)
return (year >= 2022)
else:
cmp = flags.Input.MCCampaign
return (cmp in _campaigns_R3)


def get_LHCgeometry(flags):
if isRun3(flags):
return LHCPeriod.Run3
else:
return LHCPeriod.Run2


def get_grl(flags):
year = get_data_year(flags)
try:
return _year_GRL[year]
except KeyError:
raise Exception(f'Unrecognized year for GRL {year}')


def pretty_print(flags):
"""
Print all the relevant flags we have set up, both from the
metadata and from our fall-back options.
"""
print("="*73)
print("="*19, "xAODAnaHelpers FLAG CONFIGURATION", "="*19)
print("="*73)
print(" "*2, "DataType: ", flags.Input.DataType)
print(" "*2, "LHCPeriod: ", flags.Input.LHCPeriod)
print(" "*2, "RunNumber: ", flags.Input.RunNumber)
print(" "*2, "MCChannelNumber:", flags.Input.MCChannelNumber)
print(" "*2, "RunNumberAsInt: ", flags.Input.RunNumberAsInt)
print(" "*2, "AMITag: ", flags.Input.AMITag)
print(" "*2, "isRun3: ", flags.Input.isRun3)
print(" "*2, "isPHYSLITE: ", flags.Input.isPHYSLITE)
print(" "*2, "MCCampaign: ", flags.Input.MCCampaign)
print(" "*2, "GeneratorInfo: ", flags.Input.GeneratorsInfo)
print("="*73)
38 changes: 33 additions & 5 deletions scripts/xAH_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@
import datetime
import time

# the following athena imports overwrite the root logger with Athenalogger
# we therefore save the original root logger here and afterwards set it to be root logger again
import logging, ROOT
xAH_root_logger = logging.root
ROOT.gROOT.SetBatch(True)

from AthenaConfiguration.AllConfigFlags import initConfigFlags
from AthenaConfiguration.AutoConfigFlags import GetFileMD

logging.Logger.root = xAH_root_logger
logging.Logger.manager = logging.Manager(xAH_root_logger)

try:
import configparser
except ImportError: # Python 2.x fallback
Expand Down Expand Up @@ -166,7 +178,6 @@ def __call__(self, parser, namespace, values, option_string=None):
# parse the arguments, throw errors if missing any
args = parser.parse_args()

import logging
xAH_logger = logging.getLogger("xAH.run")

# set verbosity for python printing
Expand Down Expand Up @@ -240,8 +251,7 @@ def __call__(self, parser, namespace, values, option_string=None):
xAH_logger.warning("--singleTask requires both --inputList and --inputRucio to have an effect")


# at this point, we should import ROOT and do stuff
import ROOT
# at this point, we should use ROOT and do stuff
# Set up the job for xAOD access:
ROOT.xAOD.Init("xAH_run").ignore()

Expand Down Expand Up @@ -403,7 +413,25 @@ def __call__(self, parser, namespace, values, option_string=None):
xAH_logger.info("reading all metadata in {0}".format(path_metadata))
ROOT.SH.readSusyMetaDir(sh_all,path_metadata)

# this is the basic description of our job
flags = None
if (args.auto_flags):
try: import xAODAnaHelpers.metaConfig as metaConfig
except ImportError: import python.metaConfig as metaConfig

xAH_logger.warning("Auto configuration of flags has been enabled using --autoFlags. However, the flags will not be automatically applied unless explicitly passed to user code.")

file_list = []
for sample in sh_all:
for i in range(sample.numFiles()):
file_list.append(sample.fileName(i))

flags = initConfigFlags()
flags.Input.Files = file_list
metadata = GetFileMD(file_list)
metaConfig.populate_config_flags(flags, metadata)
flags.lock()
metaConfig.pretty_print(flags)

xAH_logger.info("creating new job")
job = ROOT.EL.Job()
job.sampleHandler(sh_all)
Expand Down Expand Up @@ -454,7 +482,7 @@ def __call__(self, parser, namespace, values, option_string=None):
else:
# Executing the python
# (configGlobals and configLocals are used to pass vars
configGlobals, configLocals = {}, {'args': args}
configGlobals, configLocals = {'flags': flags}, {'args': args}
exec(open(args.config).read(), configGlobals, configLocals)

# execfile(args.config, configGlobals, configLocals)
Expand Down

0 comments on commit 99174eb

Please sign in to comment.