Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt at reading metadata flags from input #1665

Merged
merged 12 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ jobs:
- 24.2.23
- 24.2.24
- 24.2.25
- 24.2.26
- 24.2.29
- 24.2.30
- 24.2.31
- 24.2.32
- 24.2.33
- 24.2.34

steps:
- uses: actions/checkout@master
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ RUN export RELEASE_TYPE=$([ "$DOCKER_IMG" == "analysisbase" ] && echo "AnalysisB
&& source /release_setup.sh \
&& mkdir -p /workarea/build \
&& cd /workarea/build \
#-DATLAS_USE_CUSTOM_CPACK_INSTALL_SCRIPT=TRUE is needed for some AB releases.
&& time cmake ../src -DATLAS_USE_CUSTOM_CPACK_INSTALL_SCRIPT=TRUE \
&& time make -j2 \
&& cpack \
Expand Down
6 changes: 6 additions & 0 deletions python/cli_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@
"nargs": "+",
"default": [],
},
"autoFlags": {
"action": "store_true",
"help": "If enabled, meta flags will be auto read from input files (available 24.2.29 onwards).",
"dest": "auto_flags",
"default": False,
},
}

# These are handled by xAH_run.py at the top level instead of down by drivers
Expand Down
154 changes: 154 additions & 0 deletions python/metaConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from Campaigns.Utils import Campaign
from AthenaConfiguration.Enums import LHCPeriod
from AnalysisAlgorithmsConfig.ConfigAccumulator import DataType

_campaigns_AMITag = {
# NOTE this is a "fallback" approach to read campaign based on standard r-tag with pile-up for
# MC campaigns. For non-standard reconstruction, extra r-tags have to be added here to be recognized.
# Recommended approach is to read mc_campaign from FileMetaData (seems to require reasonably recent p-tags)
Campaign.MC20a: ['r13167', 'r14859'],
Campaign.MC20d: ['r13144', 'r14860'],
Campaign.MC20e: ['r13145', 'r14861'],
Campaign.MC21a: ['r13829'],
Campaign.MC23a: ['r14622'],
Campaign.MC23c: ['r14799'],
}

_years_runNumbers = {
# for each year (dict key), provide list of [min runNumber, max runNumber)
# i.e. we check min runNumber <= runNumber < max runNumber
2015: [0, 290000],
2016: [290000, 320000],
2017: [320000, 342000],
2018: [342000, 400000],
2022: [410000, 450000],
2023: [450000, 999999]
}

_campaigns_R2 = [
Campaign.MC20a, Campaign.MC20d, Campaign.MC20e
]

_campaigns_R3 = [
Campaign.MC21a, Campaign.MC23a, Campaign.MC23c,
]


def populate_config_flags(flags, metadata):
"""
Populate additional information in the AllConfigFlags from FileMetaData
"""
flags.addFlag('Input.AMITag', metadata.get('AMITag', ''))
if len(flags.Input.RunNumber) != 1:
print('WARNING (metaConfig.populate_config_flags): FileMetaData reports RunNumber list '
f'with not exactly 1 entry: {flags.Input.RunNumber}')
flags.addFlag('Input.RunNumberAsInt', int(flags.Input.RunNumber[0]))
flags.addFlag('Input.DataType', get_data_type)
is_data = (flags.Input.DataType is DataType.Data)
if not is_data:
# try a fallback solution to determine MC campaign
# this is for samples, that don't include the MCCampaign entry in FileMetaData
# this problem should be fixed in p58XX tags
if flags.Input.MCCampaign == Campaign.Unknown:
flags.Input.MCCampaign = get_campaign_fallback
flags.addFlag('Input.LHCPeriod', get_LHCgeometry)
flags.addFlag('Input.isRun3', isRun3)
flags.addFlag('Input.isPHYSLITE', isPhysLite)


def get_data_type(flags):
if not flags.Input.isMC:
return DataType.Data
else:
if flags.Sim.ISF.Simulator.isFullSim():
return DataType.FullSim
elif flags.Sim.ISF.Simulator.usesFastCaloSim():
return DataType.FastSim
else:
raise Exception('Could not determine data_type, '
'perhaps SimulationFlavour metadata is missing')


def get_campaign_fallback(flags):
"""
In case MC Campaign is not stored in FileMetaData, we can try to figure it out from AMI tag.
"""
amiTags = flags.Input.AMITag
if amiTags == '':
print('WARNING (metaConfig.get_campaign): AMITag entry in FileMetaData '
'appears to be empty or does not exist')

for (cmp, tagsList) in _campaigns_AMITag.items():
for tag in tagsList:
if tag in amiTags:
return cmp
raise Exception(f'AMITag {amiTags} in FileMetaData does not correspond to any implemented campaign')


def get_data_year(flags):
"""
Try to determine the year of data-taking based on runNumber.
"""
if flags.Input.RunNumberAsInt == 0:
print('ERROR (metaConfig.get_data_year): runNumber == 0, we cannot determine data year reliably.')
for (year, runRange) in _years_runNumbers.items():
if flags.Input.RunNumberAsInt >= runRange[0] and flags.Input.RunNumberAsInt < runRange[1]:
return year
print(f'WARNING (metaConfig.get_data_year): runNumber {flags.Input.RunNumberAsInt} '
'does not correspond to any of the defined years of data taking!')
return 0

def isPhysLite(flags):
"""
Check whether the derivation format is PHYSLITE.
"""
if flags.Input.ProcessingTags is not None:
return 'StreamDAOD_PHYSLITE' in flags.Input.ProcessingTags
else:
print('WARNING Could not find any information about the sample being PHYSLITE '
'in the metadata. Will assume that it was regular PHYS.')
return False

def isRun3(flags):
if flags.Input.DataType is DataType.Data:
year = get_data_year(flags)
return (year >= 2022)
else:
cmp = flags.Input.MCCampaign
return (cmp in _campaigns_R3)


def get_LHCgeometry(flags):
if isRun3(flags):
return LHCPeriod.Run3
else:
return LHCPeriod.Run2


def get_grl(flags):
year = get_data_year(flags)
try:
return _year_GRL[year]
except KeyError:
raise Exception(f'Unrecognized year for GRL {year}')


def pretty_print(flags):
"""
Print all the relevant flags we have set up, both from the
metadata and from our fall-back options.
"""
print("="*73)
print("="*19, "xAODAnaHelpers FLAG CONFIGURATION", "="*19)
print("="*73)
print(" "*2, "DataType: ", flags.Input.DataType)
print(" "*2, "LHCPeriod: ", flags.Input.LHCPeriod)
print(" "*2, "RunNumber: ", flags.Input.RunNumber)
print(" "*2, "MCChannelNumber:", flags.Input.MCChannelNumber)
print(" "*2, "RunNumberAsInt: ", flags.Input.RunNumberAsInt)
print(" "*2, "AMITag: ", flags.Input.AMITag)
print(" "*2, "isRun3: ", flags.Input.isRun3)
print(" "*2, "isPHYSLITE: ", flags.Input.isPHYSLITE)
print(" "*2, "MCCampaign: ", flags.Input.MCCampaign)
print(" "*2, "GeneratorInfo: ", flags.Input.GeneratorsInfo)
print("="*73)
38 changes: 33 additions & 5 deletions scripts/xAH_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@
import datetime
import time

# the following athena imports overwrite the root logger with Athenalogger
# we therefore save the original root logger here and afterwards set it to be root logger again
import logging, ROOT
xAH_root_logger = logging.root
ROOT.gROOT.SetBatch(True)

from AthenaConfiguration.AllConfigFlags import initConfigFlags
from AthenaConfiguration.AutoConfigFlags import GetFileMD

logging.Logger.root = xAH_root_logger
logging.Logger.manager = logging.Manager(xAH_root_logger)

try:
import configparser
except ImportError: # Python 2.x fallback
Expand Down Expand Up @@ -166,7 +178,6 @@ def __call__(self, parser, namespace, values, option_string=None):
# parse the arguments, throw errors if missing any
args = parser.parse_args()

import logging
xAH_logger = logging.getLogger("xAH.run")

# set verbosity for python printing
Expand Down Expand Up @@ -240,8 +251,7 @@ def __call__(self, parser, namespace, values, option_string=None):
xAH_logger.warning("--singleTask requires both --inputList and --inputRucio to have an effect")


# at this point, we should import ROOT and do stuff
import ROOT
# at this point, we should use ROOT and do stuff
# Set up the job for xAOD access:
ROOT.xAOD.Init("xAH_run").ignore()

Expand Down Expand Up @@ -403,7 +413,25 @@ def __call__(self, parser, namespace, values, option_string=None):
xAH_logger.info("reading all metadata in {0}".format(path_metadata))
ROOT.SH.readSusyMetaDir(sh_all,path_metadata)

# this is the basic description of our job
flags = None
if (args.auto_flags):
try: import xAODAnaHelpers.metaConfig as metaConfig
except ImportError: import python.metaConfig as metaConfig

xAH_logger.warning("Auto configuration of flags has been enabled using --autoFlags. However, the flags will not be automatically applied unless explicitly passed to user code.")

file_list = []
for sample in sh_all:
for i in range(sample.numFiles()):
file_list.append(sample.fileName(i))

flags = initConfigFlags()
flags.Input.Files = file_list
metadata = GetFileMD(file_list)
metaConfig.populate_config_flags(flags, metadata)
flags.lock()
metaConfig.pretty_print(flags)

xAH_logger.info("creating new job")
job = ROOT.EL.Job()
job.sampleHandler(sh_all)
Expand Down Expand Up @@ -454,7 +482,7 @@ def __call__(self, parser, namespace, values, option_string=None):
else:
# Executing the python
# (configGlobals and configLocals are used to pass vars
configGlobals, configLocals = {}, {'args': args}
configGlobals, configLocals = {'flags': flags}, {'args': args}
exec(open(args.config).read(), configGlobals, configLocals)

# execfile(args.config, configGlobals, configLocals)
Expand Down
Loading