Attempt at reading metadata flags from input (#1665)

* working attempt * more AB releasese * these rels should work * maybe this fixes it * add back old rels * bring back master * i forgot cmake * remove 2 awful releases * add comment * fixes to logger and root * add warning --------- Co-authored-by: Sagar Addepalli <addepalli.sagar@cern.ch>
UCATLAS · Jan 25, 2024 · 99174eb · 99174eb
1 parent 54f6bf2
commit 99174eb
Show file tree

Hide file tree

Showing 5 changed files with 201 additions and 5 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,6 +25,13 @@ jobs:
           - 24.2.23
           - 24.2.24
           - 24.2.25
+          - 24.2.26
+          - 24.2.29
+          - 24.2.30
+          - 24.2.31
+          - 24.2.32
+          - 24.2.33
+          - 24.2.34
 
     steps:
     - uses: actions/checkout@master

diff --git a/Dockerfile b/Dockerfile
@@ -41,6 +41,7 @@ RUN export RELEASE_TYPE=$([ "$DOCKER_IMG" == "analysisbase" ] && echo "AnalysisB
     && source /release_setup.sh \
     && mkdir -p /workarea/build \
     && cd /workarea/build \
+    #-DATLAS_USE_CUSTOM_CPACK_INSTALL_SCRIPT=TRUE is needed for some AB releases.
     && time cmake ../src -DATLAS_USE_CUSTOM_CPACK_INSTALL_SCRIPT=TRUE \
     && time make -j2 \
     && cpack \

diff --git a/python/cli_options.py b/python/cli_options.py
@@ -127,6 +127,12 @@
         "nargs": "+",
         "default": [],
     },
+    "autoFlags": {
+        "action": "store_true",
+        "help": "If enabled, meta flags will be auto read from input files (available 24.2.29 onwards).",
+        "dest": "auto_flags",
+        "default": False,
+    },
 }
 
 # These are handled by xAH_run.py at the top level instead of down by drivers

diff --git a/python/metaConfig.py b/python/metaConfig.py
@@ -0,0 +1,154 @@
+from Campaigns.Utils import Campaign
+from AthenaConfiguration.Enums import LHCPeriod
+from AnalysisAlgorithmsConfig.ConfigAccumulator import DataType
+
+_campaigns_AMITag = {
+    # NOTE this is a "fallback" approach to read campaign based on standard r-tag with pile-up for
+    # MC campaigns. For non-standard reconstruction, extra r-tags have to be added here to be recognized.
+    # Recommended approach is to read mc_campaign from FileMetaData (seems to require reasonably recent p-tags)
+    Campaign.MC20a: ['r13167', 'r14859'],
+    Campaign.MC20d: ['r13144', 'r14860'],
+    Campaign.MC20e: ['r13145', 'r14861'],
+    Campaign.MC21a: ['r13829'],
+    Campaign.MC23a: ['r14622'],
+    Campaign.MC23c: ['r14799'],
+}
+
+_years_runNumbers = {
+    # for each year (dict key), provide list of [min runNumber, max runNumber)
+    # i.e. we check min runNumber <= runNumber < max runNumber 
+    2015: [0, 290000],
+    2016: [290000, 320000],
+    2017: [320000, 342000],
+    2018: [342000, 400000],
+    2022: [410000, 450000],
+    2023: [450000, 999999]
+}
+
+_campaigns_R2 = [
+    Campaign.MC20a, Campaign.MC20d, Campaign.MC20e
+]
+
+_campaigns_R3 = [
+    Campaign.MC21a, Campaign.MC23a, Campaign.MC23c,
+]
+
+
+def populate_config_flags(flags, metadata):
+    """
+    Populate additional information in the AllConfigFlags from FileMetaData
+    """
+    flags.addFlag('Input.AMITag', metadata.get('AMITag', ''))
+    if len(flags.Input.RunNumber) != 1:
+        print('WARNING (metaConfig.populate_config_flags): FileMetaData reports RunNumber list '
+              f'with not exactly 1 entry: {flags.Input.RunNumber}')
+    flags.addFlag('Input.RunNumberAsInt', int(flags.Input.RunNumber[0]))
+    flags.addFlag('Input.DataType', get_data_type)
+    is_data = (flags.Input.DataType is DataType.Data)
+    if not is_data:
+        # try a fallback solution to determine MC campaign
+        # this is for samples, that don't include the MCCampaign entry in FileMetaData
+        # this problem should be fixed in p58XX tags
+        if flags.Input.MCCampaign == Campaign.Unknown:
+            flags.Input.MCCampaign = get_campaign_fallback
+    flags.addFlag('Input.LHCPeriod', get_LHCgeometry)
+    flags.addFlag('Input.isRun3', isRun3)
+    flags.addFlag('Input.isPHYSLITE', isPhysLite)
+
+
+def get_data_type(flags):
+    if not flags.Input.isMC:
+        return DataType.Data
+    else:
+        if flags.Sim.ISF.Simulator.isFullSim():
+            return DataType.FullSim
+        elif flags.Sim.ISF.Simulator.usesFastCaloSim():
+            return DataType.FastSim
+        else:
+            raise Exception('Could not determine data_type, '
+                            'perhaps SimulationFlavour metadata is missing')
+
+
+def get_campaign_fallback(flags):
+    """
+    In case MC Campaign is not stored in FileMetaData, we can try to figure it out from AMI tag.
+    """
+    amiTags = flags.Input.AMITag
+    if amiTags == '':
+        print('WARNING (metaConfig.get_campaign): AMITag entry in FileMetaData '
+                'appears to be empty or does not exist')
+
+    for (cmp, tagsList) in _campaigns_AMITag.items():
+        for tag in tagsList:
+            if tag in amiTags:
+                return cmp
+    raise Exception(f'AMITag {amiTags} in FileMetaData does not correspond to any implemented campaign')
+
+
+def get_data_year(flags):
+    """
+    Try to determine the year of data-taking based on runNumber.
+    """
+    if flags.Input.RunNumberAsInt == 0:
+        print('ERROR (metaConfig.get_data_year): runNumber == 0, we cannot determine data year reliably.')
+    for (year, runRange) in _years_runNumbers.items():
+        if flags.Input.RunNumberAsInt >= runRange[0] and flags.Input.RunNumberAsInt < runRange[1]:
+            return year
+    print(f'WARNING (metaConfig.get_data_year): runNumber {flags.Input.RunNumberAsInt} '
+          'does not correspond to any of the defined years of data taking!')
+    return 0
+
+def isPhysLite(flags):
+    """
+    Check whether the derivation format is PHYSLITE.
+    """
+    if flags.Input.ProcessingTags is not None:
+        return 'StreamDAOD_PHYSLITE' in flags.Input.ProcessingTags
+    else:
+        print('WARNING Could not find any information about the sample being PHYSLITE '
+              'in the metadata. Will assume that it was regular PHYS.')
+    return False
+
+def isRun3(flags):
+    if flags.Input.DataType is DataType.Data:
+        year = get_data_year(flags)
+        return (year >= 2022)
+    else:
+        cmp = flags.Input.MCCampaign
+        return (cmp in _campaigns_R3)
+
+
+def get_LHCgeometry(flags):
+    if isRun3(flags):
+        return LHCPeriod.Run3
+    else:
+        return LHCPeriod.Run2
+
+
+def get_grl(flags):
+    year = get_data_year(flags)
+    try:
+        return _year_GRL[year]
+    except KeyError:
+        raise Exception(f'Unrecognized year for GRL {year}')
+
+
+def pretty_print(flags):
+    """
+    Print all the relevant flags we have set up, both from the
+    metadata and from our fall-back options.
+    """
+    print("="*73)
+    print("="*19, "xAODAnaHelpers FLAG CONFIGURATION", "="*19)
+    print("="*73)
+    print(" "*2, "DataType:       ", flags.Input.DataType)
+    print(" "*2, "LHCPeriod:      ", flags.Input.LHCPeriod)
+    print(" "*2, "RunNumber:      ", flags.Input.RunNumber)
+    print(" "*2, "MCChannelNumber:", flags.Input.MCChannelNumber)
+    print(" "*2, "RunNumberAsInt: ", flags.Input.RunNumberAsInt)
+    print(" "*2, "AMITag:         ", flags.Input.AMITag)
+    print(" "*2, "isRun3:         ", flags.Input.isRun3)
+    print(" "*2, "isPHYSLITE:     ", flags.Input.isPHYSLITE)
+    print(" "*2, "MCCampaign:     ", flags.Input.MCCampaign)
+    print(" "*2, "GeneratorInfo:  ", flags.Input.GeneratorsInfo)
+    print("="*73)
diff --git a/scripts/xAH_run.py b/scripts/xAH_run.py
@@ -23,6 +23,18 @@
 import datetime
 import time
 
+# the following athena imports overwrite the root logger with Athenalogger
+# we therefore save the original root logger here and afterwards set it to be root logger again
+import logging, ROOT
+xAH_root_logger = logging.root
+ROOT.gROOT.SetBatch(True)
+
+from AthenaConfiguration.AllConfigFlags import initConfigFlags
+from AthenaConfiguration.AutoConfigFlags import GetFileMD
+
+logging.Logger.root = xAH_root_logger
+logging.Logger.manager = logging.Manager(xAH_root_logger)
+
 try:
     import configparser
 except ImportError: # Python 2.x fallback
@@ -166,7 +178,6 @@ def __call__(self, parser, namespace, values, option_string=None):
   # parse the arguments, throw errors if missing any
   args = parser.parse_args()
 
-  import logging
   xAH_logger = logging.getLogger("xAH.run")
 
   # set verbosity for python printing
@@ -240,8 +251,7 @@ def __call__(self, parser, namespace, values, option_string=None):
       xAH_logger.warning("--singleTask requires both --inputList and --inputRucio to have an effect")
 
 
-    # at this point, we should import ROOT and do stuff
-    import ROOT
+    # at this point, we should use ROOT and do stuff
     # Set up the job for xAOD access:
     ROOT.xAOD.Init("xAH_run").ignore()
 
@@ -403,7 +413,25 @@ def __call__(self, parser, namespace, values, option_string=None):
     xAH_logger.info("reading all metadata in {0}".format(path_metadata))
     ROOT.SH.readSusyMetaDir(sh_all,path_metadata)
 
-    # this is the basic description of our job
+    flags = None
+    if (args.auto_flags):
+      try: import xAODAnaHelpers.metaConfig as metaConfig
+      except ImportError: import python.metaConfig as metaConfig
+
+      xAH_logger.warning("Auto configuration of flags has been enabled using --autoFlags. However, the flags will not be automatically applied unless explicitly passed to user code.")
+
+      file_list = []
+      for sample in sh_all:
+        for i in range(sample.numFiles()):
+          file_list.append(sample.fileName(i))
+
+      flags = initConfigFlags()
+      flags.Input.Files = file_list
+      metadata = GetFileMD(file_list)
+      metaConfig.populate_config_flags(flags, metadata)
+      flags.lock()
+      metaConfig.pretty_print(flags)
+
     xAH_logger.info("creating new job")
     job = ROOT.EL.Job()
     job.sampleHandler(sh_all)
@@ -454,7 +482,7 @@ def __call__(self, parser, namespace, values, option_string=None):
     else:
       #  Executing the python
       #   (configGlobals and configLocals are used to pass vars
-      configGlobals, configLocals = {}, {'args': args}
+      configGlobals, configLocals = {'flags': flags}, {'args': args}
       exec(open(args.config).read(), configGlobals, configLocals)
 
       # execfile(args.config, configGlobals, configLocals)