# Using the BTagging Tool

Getting a tool to work properly is a little tricky. We'll do that here, and then move it into the hints file.

In [1]:
import awkward as ak
import vector
from func_adl_servicex_xaodr25 import FuncADLQueryPHYSLITE
from func_adl_servicex_xaodr25.xAOD.jet_v1 import Jet_v1
from servicex_analysis_utils import to_awk
from servicex import deliver, ServiceXSpec, Sample, dataset
from hist import Hist
import matplotlib.pyplot as plt
import mplhep as hep
import numpy as np
from func_adl import ObjectStream, func_adl_callable
import ast
from typing import Tuple, TypeVar


Setup the code properly!

In [2]:
physlite = FuncADLQueryPHYSLITE()

Now configure the tool to run

- Taking examples from [use of the JetSelectionTool in the data extractor](https://github.com/gordonwatts/sx_training_fetch/blob/main/calratio_training_data/cpp_xaod_utils.py#L123)
- BTaggingSelectionTool info from the [BTagging Docs](https://ftag.docs.cern.ch/calibrations/cdi/interface/btaggingefficiencytool/).
- And from some [example code](https://gitlab.cern.ch/atlas/athena/-/blob/main/PhysicsAnalysis/JetTagging/JetTagPerformanceCalibration/xAODBTaggingEfficiency/util/BTaggingSelectionToolTester.cxx).
- And a better [working example](https://gitlab.cern.ch/atlas/athena/-/blob/main/PhysicsAnalysis/JetTagging/JetTagPerformanceCalibration/xAODBTaggingEfficiency/util/BTaggingToolsExample.cxx)

In [None]:
cut_name = "FixedCutBEff_77"
tool_name = f"btag_selection_tool_{cut_name}"


query_base = physlite.MetaData(
    {
        "metadata_type": "inject_code",
        "name": f"btag_tool_{tool_name}",
        "header_includes": ["xAODBTaggingEfficiency/BTaggingSelectionTool.h"],
        "private_members": [f"IBTaggingSelectionTool *{tool_name};"],
        "instance_initialization": [
            f'{tool_name}(new BTaggingSelectionTool("{tool_name}"))'
        ],
        "initialize_lines": [
            f'ANA_CHECK(asg::setProperty({tool_name}, "FlvTagCutDefinitionsFileName", "13p6TeV/MC23_2025-06-17_GN2v01_v4.root"));',
            # f'ANA_CHECK(asg::setProperty({tool_name}, "TaggerName", ""));',
            f'ANA_CHECK(asg::setProperty({tool_name}, "OperatingPoint", "{cut_name}"));',
            # f'ANA_CHECK(asg::setProperty({tool_name}, "JetAuthor", "AntiKt4EMPFlowJets"));',  # Default is PFlowJets
            # f'ANA_CHECK(asg::setProperty({tool_name}, "MinPt", 20000));',  # Default is 20 GeV
            f"ANA_CHECK({tool_name}->initialize());",
        ],
        "link_libraries": ["xAODBTaggingEfficiency"],
    }
)

Some other info:

* `13p6TeV/MC23_2025-06-17_GN2v01_v4.root` for Run 3 (MC23 files)
* `13TeV/MC20_2025-06-17_GN2v01_v4.root` for Run 2 (MC20 files)

Possible cut names refer to xx yy zz

Next, define the code that will actually get the tag rate for us.

In [4]:
T = TypeVar("T")

def tag_passed_callback(
    s: ObjectStream[T], a: ast.Call
) -> Tuple[ObjectStream[T], ast.Call]:
    new_s = s.MetaData(
        {
            "metadata_type": "add_cpp_function",
            "name": "jet_is_tagged",
            "code": [f"bool result = static_cast<bool>({tool_name}->accept(*jet))"],
            "result": "result",
            "include_files": [],
            "arguments": ["jet"],
            "return_type": "bool",
        }
    )
    return new_s, a


@func_adl_callable(tag_passed_callback)
def jet_is_tagged(jet: Jet_v1) -> bool:
    """Call the jet selection on the jet.

    * return true or false if the jet passes the selection cut.

    Args:
        jet (Jet_v1): The jet we are operating against
        value_selector (int): Which value (pixel holes, etc.)

    NOTE: This is a dummy function that injects C++ into the object stream to do the
    actual work.

    Returns:
        bool: Is the jet b-tagged?
    """
    ...

Finally, we can actually execute the query!

In [5]:
query = (
    query_base.SelectMany(lambda event: event.Jets())
    .Where(lambda j: j.pt() > 20000 and abs(j.eta()) < 2)
    .Select(
        lambda j: {
            "is_tagged": jet_is_tagged(j),
            "pt": j.pt(),
            "eta": j.eta(),
        }
    )
)

And now we can run on a data sample!

In [6]:
ds_name = "mc23_13p6TeV:mc23_13p6TeV.601237.PhPy8EG_A14_ttbar_hdamp258p75_allhad.deriv.DAOD_PHYSLITE.e8514_s4369_r16083_p6697"

data = to_awk(
    deliver(
        ServiceXSpec(
            Sample=[
                Sample(
                    Name="ttbar_trijet",
                    Dataset=dataset.Rucio(ds_name),
                    NFiles=1,
                    Query=query,  # type: ignore
                )
            ]
        ),
        servicex_name="servicex-release-prod",
    )
)
jets = data["ttbar_trijet"]

Output()

ValueError: ServiceX result path list for ttbar_trijet cannot be empty.