***
# Starting Kit - Black Swan HiggsML Course
***

In [1]:
COLAB = "google.colab" in str(get_ipython())

In [2]:
if COLAB:
    ! git clone --depth 1 https://github.com/blackSwanCS/Higgs_collaborations.git

    ! git status
    %cd Higgs_collaborations

In [3]:
# HiggsML utility package should not be modified
%pip install HiggsML

### Imports

In [4]:
from sys import path
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
from itertools import product
from numpy.random import RandomState
import warnings
import os
import sys

warnings.filterwarnings("ignore")

### Directories

In [5]:
root_dir = os.getcwd()
print("Root directory is", root_dir)
submission_dir = os.path.join(root_dir, "sample_code_submission")

# The directory where results and other outputs from the participant's code will be written
output_dir = os.path.join(root_dir, "sample_result_submission")

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

## Import Submission Model
We import a class named `Model` from the submission file (`model.py`). This `Model` class has the following methods:
- `init`: receives train set and systematics class as input
- `fit`: can be used for training
- `predict`: receives one test set and outputs a dictionary with the following keys
    - `mu_hat` : predicted mu $\hat{\mu}$
    - `delta_mu_hat`: $\Delta{\hat{\mu}}$ bound for $\mu$
    - `p16`: 16th percentile
    - `p84`: 84th percentile

In this example code, the `Model` class implements a basic model with 2 different model trained to predict the class label. 

* 1 XGBoost BDT ( [see](/home/chakkappai/Work/ST4_CS/Collaboration_A/sample_code_submission/boosted_decision_tree.py) )
* 2 Tebsorflow NN  ( [see](/home/chakkappai/Work/ST4_CS/Collaboration_A/sample_code_submission/neural_network.py) )

The feature engineering is in where you can include derived quantities and decide which feature should be needed. ( [see](/home/chakkappai/Work/ST4_CS/Collaboration_A/sample_code_submission/feature_engineering.py) ) 

the statistical analysis part is where yoiu write the mu finding calculation using the output of the classifier. ( [see](/home/chakkappai/Work/ST4_CS/Collaboration_A/sample_code_submission/statistical_analysis.py) ) 

If running in Collab, click the folder icon in the left sidebar to open the file browser.


In [6]:
path.append(submission_dir)
from model import Model

## Data
### Available data sets
1. blackSwan_data
2. sample_data
3. neurips2024_data 

In [34]:
from HiggsML.datasets import download_dataset

data = download_dataset(
    "blackSwan_data"
)  # change to "blackSwan_data" for the actual data

### ⚠️ Note:
The data used here is a small subset of the full data is for demonstration only to get a view of what the data looks like. 

In [8]:
# load train set
data.load_train_set()
data_set = data.get_train_set()

***
## Visualize the Data Set
***

In [9]:
from tabulate import tabulate

target = data_set["labels"]
weights = data_set["weights"]
detailed_label = data_set["detailed_labels"]
keys = np.unique(detailed_label)


weight_keys = {}
average_weights = {}
for key in keys:
    weight_keys[key] = weights[detailed_label == key]

table_data = []
for key in keys:
    table_data.append(
        [
            key,
            np.sum(weight_keys[key]),
            len(weight_keys[key]),
            np.mean(weight_keys[key]),
        ]
    )

table_data.append(
    [
        "Total Signal",
        np.sum(weights[target == 1]),
        len(weights[target == 1]),
        np.mean(weights[target == 1]),
    ]
)
table_data.append(
    [
        "Total Background",
        np.sum(weights[target == 0]),
        len(weights[target == 0]),
        np.mean(weights[target == 0]),
    ]
)


print("[*] --- Detailed Label Summary")
print(
    tabulate(
        table_data,
        headers=[
            "Detailed Label",
            "Total Weight",
            "Number of events",
            "Average Weight",
        ],
        tablefmt="grid",
    )
)

In [10]:
print("\n[*] --- Examples of all features\n")
display(data_set.head())

In [11]:
print("\n[*] --- Description of all features\n")
display(data_set.describe())

In [None]:
from utils import histogram_dataset

feature_columns = [col for col in data_set.columns if col.startswith("PRI_") or col.startswith("DER_")]

for i in range(0, len(feature_columns), 4):
    subset = feature_columns[i:i+4]
    histogram_dataset(data_set, data_set["labels"], data_set["weights"], columns=subset)

In [13]:
from utils import histogram_dataset

# this function is defined in utils.py in the sample_code_submission directory. feel free to modify it as needed

histogram_dataset(
    data_set,
    target,
    weights,
    columns=["PRI_lep_phi", "PRI_met", "DER_mass_vis", "DER_deltaeta_jet_jet"],
)

In [14]:
import seaborn as sns

sns.set_theme(rc={"figure.figsize": (10, 10)}, style="whitegrid")

caption = ["Signal feature", "Background feature"]

for i in range(2):

    dfplot = pd.DataFrame(
        data_set,
        columns=[
            "PRI_lep_phi",
            "PRI_met",
            "DER_pt_ratio_lep_had",
            "DER_deltaeta_jet_jet",
        ],
    )

    print(caption[i], " correlation matrix")
    corrMatrix = dfplot[target == i].corr()
    sns.heatmap(corrMatrix, annot=True)
    plt.title("Correlation matrix of features")
    plt.show()

del dfplot

In [15]:
from HiggsML.visualization import stacked_histogram

stacked_histogram(data_set, target, weights, detailed_label, "PRI_jet_subleading_pt")

In [16]:
from HiggsML.visualization import pair_plots

# Show data summary
pair_plots(
    data_set,
    target,
    sample_size=100,
    columns=[
        "PRI_lep_phi",
        "PRI_met",
        "DER_lep_eta_centrality",
        "DER_deltaeta_jet_jet",
    ],
)

In [17]:
import matplotlib.pyplot as plt
from HiggsML.datasets import download_dataset

data = download_dataset("blackSwan_data")
data.load_train_set()
train_set = data.get_train_set()

# 模拟系统偏差：选择一种
bias_type = "jes"         # 可选项："jes", "tes", "soft_met"
bias_magnitude = 0.01     # 对应 sigma 值，可以设为 ±0.01

# 构建偏置版本的数据
biased_train = train_set.copy()

if bias_type == "jes":
    factor = 1.0 + bias_magnitude
    for feature in ["PRI_met", "PRI_jet_leading_pt", "PRI_jet_subleading_pt"]:
        if feature in biased_train.columns:
            biased_train[feature] *= factor

elif bias_type == "tes":
    factor = 1.0 + bias_magnitude
    if "PRI_tau_pt" in biased_train.columns:
        biased_train["PRI_tau_pt"] *= factor

elif bias_type == "soft_met":
    if "PRI_met" in biased_train.columns:
        biased_train["PRI_met"] += 20  # 加性偏差示例

# 可视化受影响的特征
affected_features = ["PRI_met", "PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_tau_pt"]

labels = train_set["labels"]
weights = train_set["weights"]
sig = train_set[labels == 1]
bkg = train_set[labels == 0]
sig_biased = biased_train[labels == 1]
bkg_biased = biased_train[labels == 0]

for feature in affected_features:
    if feature not in train_set.columns:
        continue

    plt.figure(figsize=(8, 5))
    plt.hist(sig[feature], bins=50, weights=weights[labels == 1], label="Signal (original)", color="red", alpha=0.4, density=True)
    plt.hist(sig_biased[feature], bins=50, weights=weights[labels == 1], label="Signal (biased)", color="red", histtype="step", linewidth=2)

    plt.hist(bkg[feature], bins=50, weights=weights[labels == 0], label="Background (original)", color="blue", alpha=0.4, density=True)
    plt.hist(bkg_biased[feature], bins=50, weights=weights[labels == 0], label="Background (biased)", color="blue", histtype="step", linewidth=2)

    plt.title(f"{feature}: Effect of {bias_type.upper()} Bias (+{bias_magnitude*100:.1f}%)")
    plt.xlabel(feature)
    plt.ylabel("Density")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [18]:
import numpy as np
import pandas as pd

def apply_systematics_full(
    df, 
    alpha_tes=1.0, 
    alpha_jes=1.0, 
    alpha_soft_met=0.0, 
    alpha_ttbar_scale=1.0, 
    alpha_diboson_scale=1.0, 
    alpha_bkg_scale=1.0,
    enforce_threshold=True,
    threshold_value=26
):
    df = df.copy()

    # TES: adjust tau pt
    if "PRI_tau_pt" in df.columns:
        df["PRI_tau_pt"] *= alpha_tes

    # JES: adjust jets pt
    for feat in ["PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_jet_all_pt"]:
        if feat in df.columns:
            df[feat] *= alpha_jes

    # Apply event threshold after JES/TES (to hide bias signature)
    if enforce_threshold:
        for feat in ["PRI_tau_pt", "PRI_jet_leading_pt", "PRI_jet_subleading_pt"]:
            if feat in df.columns:
                df = df[df[feat] > threshold_value]

    # Soft MET: apply additive 2D noise to PRI_met
    if "PRI_met" in df.columns and alpha_soft_met > 0.0:
        soft_noise_px = np.random.normal(0, alpha_soft_met, size=len(df))
        soft_noise_py = np.random.normal(0, alpha_soft_met, size=len(df))
        soft_et = np.sqrt(soft_noise_px**2 + soft_noise_py**2)
        df["PRI_met"] += soft_et

    # Weight biases (background only)
    if "labels" in df.columns and "weights" in df.columns and "detailed_labels" in df.columns:
        weights = df["weights"].copy()
        is_bkg = df["labels"] == 0

        # Start with base background scaling
        weights[is_bkg] *= alpha_bkg_scale

        # ttbar background
        weights[(df["detailed_labels"] == "ttbar")] *= alpha_ttbar_scale

        # diboson background
        weights[(df["detailed_labels"] == "diboson")] *= alpha_diboson_scale

        df["weights"] = weights

    return df

import matplotlib.pyplot as plt

# 应用偏差（示例：JES 上调1%，Soft MET 加噪声）
biased_df = apply_systematics_full(
    train_set,
    alpha_jes=1.01,
    alpha_soft_met=15,  # 高斯噪声标准差
    alpha_tes=1.0,
    alpha_ttbar_scale=1.0,
    alpha_diboson_scale=1.0,
    alpha_bkg_scale=1.0,
    enforce_threshold=False
)

# 可视化影响的主要特征
features = ["PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_met", "PRI_tau_pt"]
labels = train_set["labels"]
weights = train_set["weights"]

biased_labels = biased_df["labels"]
biased_weights = biased_df["weights"]

for feat in features:
    if feat not in train_set.columns or feat not in biased_df.columns:
        continue

    plt.figure(figsize=(8, 5))
    # 原始数据（Signal + Background）
    plt.hist(train_set[feat][labels == 1], bins=50, weights=weights[labels == 1]*100, 
             label="Signal (original)", color="red", alpha=0.3, density=False)
    plt.hist(train_set[feat][labels == 0], bins=50, weights=weights[labels == 0], 
             label="Background (original)", color="blue", alpha=0.3, density=False)

    # 有偏数据（Signal + Background）
    plt.hist(biased_df[feat][biased_labels == 1], bins=50, weights=biased_weights[biased_labels == 1]*100, 
             label="Signal (biased)", histtype="step", color="red", linewidth=1.5)
    plt.hist(biased_df[feat][biased_labels == 0], bins=50, weights=biased_weights[biased_labels == 0], 
             label="Background (biased)", histtype="step", color="blue", linewidth=1.5)

    plt.title(f"{feat} — with JES=1.01, SoftMET noise=15")
    plt.xlabel(feat)
    plt.ylabel("Density")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()
    
print("Original signal samples:", sum(labels == 1))
print("Original background samples:", sum(labels == 0))
print("Biased signal samples:", sum(biased_labels == 1))
print("Biased background samples:", sum(biased_labels == 0))



In [19]:
print("Signal weight sum:", weights[labels == 1].sum())
print("Background weight sum:", weights[labels == 0].sum())


In [37]:
# import numpy as np
# import matplotlib.pyplot as plt
# from systematics import systematics

# # ==== 加载原始数据集（假设你已经有 original_df） ====
# # 如果没有，请先运行：
# data.load_train_set()
# original_df = data.get_train_set()

# # ==== 应用系统偏差 ====
# biased_df = systematics(
#     {"data": original_df, "weights": np.ones(len(original_df))},
#     tes=1.01,
#     jes=1.01,
#     soft_met=15,
#     ttbar_scale=1.0,
#     diboson_scale=1.0,
#     bkg_scale=1.0,
#     dopostprocess=False
# )["data"]

# # ==== 可视化设置 ====
# features = ["PRI_tau_pt", "PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_met"]
# colors = {
#     "Signal (original)": "red",
#     "Signal (biased)": "darkred",
#     "Background (original)": "blue",
#     "Background (biased)": "darkblue"
# }

# labels = original_df["labels"]
# weights = original_df["weights"]

# biased_labels = biased_df["labels"]
# biased_weights = biased_df["weights"]

# # ==== 画图 ====
# for feat in features:
#     if feat not in original_df.columns or feat not in biased_df.columns:
#         continue

#     plt.figure(figsize=(8, 5))

#     # 原始数据
#     plt.hist(original_df[feat][labels == 1], bins=50, weights=weights[labels == 1],
#              label="Signal (original)", color=colors["Signal (original)"], alpha=0.25, density=True)
#     plt.hist(original_df[feat][labels == 0], bins=50, weights=weights[labels == 0],
#              label="Background (original)", color=colors["Background (original)"], alpha=0.25, density=True)

#     # 有偏数据
#     plt.hist(biased_df[feat][biased_labels == 1], bins=50, weights=biased_weights[biased_labels == 1],
#              label="Signal (biased)", histtype="step", color=colors["Signal (biased)"], linewidth=1.5, density=True)
#     plt.hist(biased_df[feat][biased_labels == 0], bins=50, weights=biased_weights[biased_labels == 0],
#              label="Background (biased)", histtype="step", color=colors["Background (biased)"], linewidth=1.5, density=True)

#     plt.title(f"{feat} — effect of JES+TES+SoftMET")
#     plt.xlabel(feat)
#     plt.ylabel("Density (normalized)")
#     plt.legend()
#     plt.grid(True)
#     plt.tight_layout()
#     plt.show()


import numpy as np
import matplotlib.pyplot as plt
from HiggsML.systematics import systematics

# ==== Load the original dataset original_df ====
data.load_train_set()
original_df = data.get_train_set()

# === Apply systematic biases and extract data and weights ===
biased = systematics(
    {"data": original_df, "weights": np.ones(len(original_df))},
    tes=1.01,
    jes=1.01,
    soft_met=15,
    ttbar_scale=1.0,
    diboson_scale=1.0,
    bkg_scale=1.0,
    dopostprocess=False
)

biased_df = biased["data"]
biased_weights = biased["weights"]

# === Visualize affected features ===
features = ["PRI_had_pt", "PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_met"]
labels = original_df["labels"]
weights = original_df["weights"]
biased_labels = biased_df["labels"]

for feat in features:
    if feat not in original_df.columns or feat not in biased_df.columns:
        continue

    plt.figure(figsize=(8, 5))
    
    # Original signal and background
    plt.hist(original_df[feat][labels == 1], bins=100, weights=weights[labels == 1],
             label="Signal (original)", color="red", alpha=0.25, density=True)
    plt.hist(original_df[feat][labels == 0], bins=100, weights=weights[labels == 0],
             label="Background (original)", color="blue", alpha=0.25, density=True)

    # Biased signal and background
    plt.hist(biased_df[feat][biased_labels == 1], bins=100, weights=biased_weights[biased_labels == 1],
             label="Signal (biased)", histtype="step", color="darkred", linewidth=1.5, density=True)
    plt.hist(biased_df[feat][biased_labels == 0], bins=100, weights=biased_weights[biased_labels == 0],
             label="Background (biased)", histtype="step", color="darkblue", linewidth=1.5, density=True)

    plt.title(f"{feat} — effect of JES+TES+SoftMET")
    plt.xlabel(feat)
    plt.ylabel("Density (normalized)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [21]:
print("Columns in original_df:", original_df.columns.tolist())
print("Columns in biased_df:", biased_df.columns.tolist())

In [22]:
# 原始信号 + 背景总数
print("Full original signal:", (original_df["labels"] == 1).sum())
print("Full original background:", (original_df["labels"] == 0).sum())

# 当前参与绘图的信号/背景数量
print("Filtered signal used in plot:", sig.shape[0])
print("Filtered background used in plot:", bkg.shape[0])


In [39]:
import numpy as np
import matplotlib.pyplot as plt
from HiggsML.systematics import systematics

# ==== Load the original dataset ====
data.load_train_set()
original_df = data.get_train_set()

# === Apply systematic biases ===
biased = systematics(
    {"data": original_df, "weights": np.ones(len(original_df))},
    tes=1.01,
    jes=1.01,
    soft_met=15,
    ttbar_scale=1.0,
    diboson_scale=1.0,
    bkg_scale=1.0,
    dopostprocess=False
)

biased_df = biased["data"]
biased_weights = biased["weights"]

# === Define features to visualize ===
features = ["PRI_had_pt", "PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_met"]
labels = original_df["labels"]
weights = original_df["weights"]
biased_labels = biased_df["labels"]

for feat in features:
    if feat not in original_df.columns or feat not in biased_df.columns:
        continue

    # Combine all relevant values (original + biased) to compute robust x-axis range
    all_vals = pd.concat([original_df[feat], biased_df[feat]]).dropna()
    x_min, x_max = np.percentile(all_vals, [0.5, 99.5])  # Automatically clip extreme outliers

    plt.figure(figsize=(8, 5))

    # Original signal and background
    plt.hist(original_df[feat][labels == 1], bins=120, weights=weights[labels == 1],
             label="Signal (original)", color="red", alpha=0.25, density=True)
    plt.hist(original_df[feat][labels == 0], bins=120, weights=weights[labels == 0],
             label="Background (original)", color="blue", alpha=0.25, density=True)

    # Biased signal and background
    plt.hist(biased_df[feat][biased_labels == 1], bins=120, weights=biased_weights[biased_labels == 1],
             label="Signal (biased)", histtype="step", color="darkred", linewidth=1.5, density=True)
    plt.hist(biased_df[feat][biased_labels == 0], bins=120, weights=biased_weights[biased_labels == 0],
             label="Background (biased)", histtype="step", color="darkblue", linewidth=1.5, density=True)

    # Automatically set x-axis limit based on feature's central range
    plt.xlim(x_min, x_max)

    plt.title(f"{feat} — effect of JES+TES+SoftMET")
    plt.xlabel(feat)
    plt.ylabel("Density (normalized)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [43]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from HiggsML.systematics import systematics

# === Load original dataset ===
data.load_train_set()
original_df = data.get_train_set()

# === Apply systematic shifts ===
biased = systematics(
    {"data": original_df, "weights": np.ones(len(original_df))},
    tes=1.01,
    jes=1.,
    soft_met=15,
    ttbar_scale=1.0,
    diboson_scale=1.0,
    bkg_scale=1.0,
    dopostprocess=False
)

biased_df = biased["data"]
biased_weights = biased["weights"]

labels = original_df["labels"]
weights = original_df["weights"]
biased_labels = biased_df["labels"]

# === Features to visualize ===
features = ["PRI_had_pt", "PRI_jet_leading_pt", "PRI_jet_subleading_pt", "PRI_met"]

# === Helper: compute Freedman-Diaconis bin edges ===
def compute_fd_bins(data, max_bins=100):
    data = np.asarray(data)
    data = data[np.isfinite(data)]
    if len(data) < 2:
        return np.array([data[0], data[0] + 1])
    q25, q75 = np.percentile(data, [25, 75])
    iqr = q75 - q25
    bin_width = 2 * iqr / np.cbrt(len(data))
    if bin_width <= 0:
        return np.linspace(np.min(data), np.max(data), 10)
    bins = int(np.ceil((data.max() - data.min()) / bin_width))
    bins = min(bins, max_bins)
    return np.linspace(data.min(), data.max(), bins + 1)

# === Visualization ===
for feat in features:
    if feat not in original_df.columns or feat not in biased_df.columns:
        continue

    # Combine all data for bin estimation
    data_all = pd.concat([
        original_df[feat][labels == 1],
        original_df[feat][labels == 0],
        biased_df[feat][biased_labels == 1],
        biased_df[feat][biased_labels == 0]
    ])
    bin_edges = compute_fd_bins(data_all)
    if feat == "PRI_had_pt":
        print("test")
        print(original_df[feat][labels == 1])
        print(biased_df[feat][biased_labels == 1])
    plt.figure(figsize=(8, 5))

    # Original signal and background
    plt.hist(original_df[feat][labels == 1], bins=bin_edges, weights=weights[labels == 1],
             label="Signal (original)", color="red", alpha=0.25, density=True)
    plt.hist(original_df[feat][labels == 0], bins=bin_edges, weights=weights[labels == 0],
             label="Background (original)", color="blue", alpha=0.25, density=True)

    # Biased signal and background
    plt.hist(biased_df[feat][biased_labels == 1], bins=bin_edges, weights=biased_weights[biased_labels == 1],
             label="Signal (biased)", histtype="step", color="darkred", linewidth=1.5, density=True)
    plt.hist(biased_df[feat][biased_labels == 0], bins=bin_edges, weights=biased_weights[biased_labels == 0],
             label="Background (biased)", histtype="step", color="darkblue", linewidth=1.5, density=True)

    plt.title(f"{feat} — effect of JES+TES+SoftMET")
    plt.xlabel(feat)
    plt.ylabel("Density (normalized)")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


### Test Settings
The Test setting sets the test conditions in ingestion.
This includes what systematics you want and how many psuedo experiments you want. 

In [None]:
TEST_SETTINGS = {
    "systematics": {  # Systematics to use
        "tes": False,  # tau energy scale
        "jes": False,  # jet energy scale
        "soft_met": False,  # soft term in MET
        "ttbar_scale": False,  # W boson scale factor
        "diboson_scale": False,  # Diboson scale factor
        "bkg_scale": False,  # Background scale factor
    },
    "num_pseudo_experiments": 25,  # Number of pseudo-experiments to run per set
    "num_of_sets": 25,  # Number of sets of pseudo-experiments to run
}

RANDOM_SEED = 42

In [7]:
test_settings = TEST_SETTINGS.copy()

random_state = np.random.RandomState(RANDOM_SEED)
test_settings["ground_truth_mus"] = (
    random_state.uniform(0.1, 3, test_settings["num_of_sets"])
).tolist()

random_settings_file = os.path.join(output_dir, "test_settings.json")
with open(random_settings_file, "w") as f:
    json.dump(test_settings, f)

### Ingestion



In [18]:
from HiggsML.ingestion import Ingestion

ingestion = Ingestion(data)

In [19]:
# initialize submission
ingestion.init_submission(Model)

In [20]:
# fit submission
ingestion.fit_submission()

In [21]:
# load test set
data.load_test_set()

In [22]:
# predict submission
ingestion.predict_submission(test_settings)

In [23]:
ingestion.process_results_dict()

In [24]:
# save result
ingestion.save_result(output_dir)

In [None]:
import sys
import os
import importlib
sys.path.append(os.getcwd())
import sample_code_submission.feature_analysis as fa
importlib.reload(fa)
 
print("Top 10 minimal dependent features:", fa.minimal_dependent_features(data_set))

## Score
1. Compute Scores
2. Visualize Scores


In [25]:
from HiggsML.score import Scoring

In [26]:
# Initialize Score
score = Scoring()

In [27]:
print(output_dir)
score.load_ingestion_results(prediction_dir=output_dir, score_dir=output_dir)

In [28]:
# Compute Score
score.compute_scores(test_settings)

In [29]:
from HiggsML.visualization import visualize_scatter

# Visualize scatter plot of ground truth mu and predicted mu
visualize_scatter(
    ingestion_result_dict=ingestion.results_dict,
    ground_truth_mus=test_settings["ground_truth_mus"],
)

In [30]:
!python -m HiggsML.score --prediction $output_dir --output $output_dir