# Using the BTagging Tool

Getting a tool to work properly is a little tricky. We'll do that here, and then move it into the hints file.

In [8]:
from func_adl_servicex_xaodr25 import FuncADLQueryPHYSLITE
from func_adl_servicex_xaodr25.xAOD.jet_v1 import Jet_v1
from servicex_analysis_utils import to_awk
from servicex import ServiceXSpec, Sample, dataset, deliver
from func_adl import ObjectStream, func_adl_callable
import ast
from typing import Tuple, TypeVar
# from servicex_local import deliver, LocalXAODCodegen, DockerScienceImage, SXLocalAdaptor, WSL2ScienceImage

Setup the code properly!

In [9]:
physlite = FuncADLQueryPHYSLITE()

Now configure the tool to run

- Taking examples from [use of the JetSelectionTool in the data extractor](https://github.com/gordonwatts/sx_training_fetch/blob/main/calratio_training_data/cpp_xaod_utils.py#L123)
- BTaggingSelectionTool info from the [BTagging Docs](https://ftag.docs.cern.ch/calibrations/cdi/interface/btaggingefficiencytool/).
- And from some [example code](https://gitlab.cern.ch/atlas/athena/-/blob/main/PhysicsAnalysis/JetTagging/JetTagPerformanceCalibration/xAODBTaggingEfficiency/util/BTaggingSelectionToolTester.cxx).
- And a better [working example](https://gitlab.cern.ch/atlas/athena/-/blob/main/PhysicsAnalysis/JetTagging/JetTagPerformanceCalibration/xAODBTaggingEfficiency/util/BTaggingToolsExample.cxx)

In [10]:
from dataclasses import dataclass
from typing import Callable, Iterable, List, Optional

T = TypeVar("T")


@dataclass
class ToolInfo:
    name: str


def make_a_tool(
    query: ObjectStream[T],
    tool_name: str,
    tool_type: str,
    include_files: Optional[List[str]],
    init_lines: List[str] = [],
) -> Tuple[ObjectStream[T], ToolInfo]:

    # Define the C++ for the tool initialization

    query_base = query.MetaData(
        {
            "metadata_type": "inject_code",
            "name": tool_name,
            "header_includes": include_files,
            "private_members": [f"{tool_type} *{tool_name};"],
            "instance_initialization": [
                f'{tool_name}(new {tool_type} ("{tool_name}"))'
            ],
            "initialize_lines": [l.format(tool_name=tool_name) for l in init_lines],
            "link_libraries": ["xAODBTaggingEfficiencyLib"],
        }
    )

    return query_base, ToolInfo(name=tool_name)


def make_tool_accessor(
    t_info: ToolInfo,
    function_name: str,
    source_code: List[str],
    arguments: Iterable[Tuple[str, type]],
    return_type_cpp: str,
    return_type_python: str
):

    # Define the callback function that `func_adl` will use to inject the calling code.
    def tool_callback(
        s: ObjectStream[T], a: ast.Call
    ) -> Tuple[ObjectStream[T], ast.Call]:
        new_s = s.MetaData(
            {
                "metadata_type": "add_cpp_function",
                "name": function_name,
                "code": [
                    "double result;",
                    *[l.format(tool_name=t_info.name) for l in source_code],
                ],
                "result": "result",
                "include_files": [],
                "arguments": [a[0] for a in arguments],
                "return_type": return_type_cpp,
            }
        )
        return new_s, a

    # Build a function type-shed that tells `func_adl` what the function signature is.
    # This is used to generate the correct C++ code for the function.
    def tool_call(**arg_dict):
        """
        NOTE: This is a dummy function that injects C++ into the object stream to do the
        actual work.
        """
        ...
    tool_call.__name__ = function_name
    tool_call.__annotations__['return'] = eval(return_type_python)
    # def tag_weight(**arg_dict) -> return_type_cpp:
    #     """
    #     NOTE: This is a dummy function that injects C++ into the object stream to do the
    #     actual work.
    #     """
    #     ...

    return func_adl_callable(tool_callback)(tool_call)

In [11]:
query_base, t_info = make_a_tool(
    physlite,
    "btag_discriminator",
    "BTaggingSelectionTool",
    include_files=["xAODBTaggingEfficiency/BTaggingSelectionTool.h"],
    init_lines=[
        'ANA_CHECK(asg::setProperty({tool_name}, "OperatingPoint", "FixedCutBEff_77"));',
        "ANA_CHECK({tool_name}->initialize());",
    ],
)

tag_weight = make_tool_accessor(
    t_info,
    function_name="tag_weight",
    source_code=["ANA_CHECK({tool_name}->getTaggerWeight(*jet, result, false));"],
    arguments=[("jet", Jet_v1)],
    return_type_cpp="double", return_type_python = "float"
)

Some other info:

* `13p6TeV/MC23_2025-06-17_GN2v01_v4.root` for Run 3 (MC23 files)
* `13TeV/MC20_2025-06-17_GN2v01_v4.root` for Run 2 (MC20 files)

It seems that we should not specify the btagging file - use whatever is default in the release.

Next, define the code that will actually get the tag rate for us.

In [12]:

def tag_passed_callback(

    s: ObjectStream[T], a: ast.Call
) -> Tuple[ObjectStream[T], ast.Call]:

    new_s = s.MetaData(
        {
            "metadata_type": "add_cpp_function",
            "name": "jet_is_tagged",

            "code": [f"bool result = static_cast<bool>({t_info.name}->accept(*jet))"],
            "result": "result",


            "include_files": [],
            "arguments": ["jet"],
            "return_type": "bool",
        }
    )

    return new_s, a

@func_adl_callable(tag_passed_callback)
def jet_is_tagged(jet: Jet_v1) -> bool:
    """Call the jet selection on the jet.

    * return true or false if the jet passes the selection cut.

    Args:

        jet (Jet_v1): The jet we are operating against
        value_selector (int): Which value (pixel holes, etc.)

    NOTE: This is a dummy function that injects C++ into the object stream to do the
    actual work.

    Returns:
        bool: Is the jet b-tagged?
    """
    ...

Finally, we can actually execute the query!

In [13]:
query = (
    query_base.SelectMany(lambda event: event.Jets())
    .Where(lambda j: j.pt() > 20000 and abs(j.eta()) < 2)
    .Select(
        lambda j: {
            "is_tagged": jet_is_tagged(j),
            "weight": tag_weight(j),
            "pt": j.pt(),
            "eta": j.eta(),
        }
    )
)

And now we can run on a data sample!

In [14]:
ds_name = "mc23_13p6TeV:mc23_13p6TeV.601237.PhPy8EG_A14_ttbar_hdamp258p75_allhad.deriv.DAOD_PHYSLITE.e8514_s4369_r16083_p6697"
data = to_awk(
    deliver(
        ServiceXSpec(
            Sample=[
                Sample(
                    Name="ttbar_trijet",
                    Dataset=dataset.Rucio(ds_name),
                    NFiles=1,
                    Query=query,  # type: ignore
                )
            ]
        ),
    )
)
jets = data["ttbar_trijet"]

Output()

In [15]:
jets