In [1]:
import pandas as pd
import numpy as np
import glob

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from xgboost import XGBRegressor

In [2]:
det_raw_ids_df = pd.read_csv("/users/hep/eigen1907/Workspace/Workspace-DL/241215-track_det_raw_id/muon_system_det_raw_id.csv")
det_raw_ids = np.sort(det_raw_ids_df['det_raw_id'].unique())

track_files = glob.glob("/users/hep/eigen1907/Workspace/Workspace-DL/241215-track_det_raw_id/TrackDetMatchesSample/output_*.csv")
tracks_df_list = []
for f in track_files:
    df_temp = pd.read_csv(f)
    tracks_df_list.append(df_temp)
tracks_df = pd.concat(tracks_df_list, ignore_index=True)
tracks_df['det_raw_id'] = tracks_df['det_raw_id'].apply(lambda x: list(map(int, x.split(' ')[:-1])))

mlb = MultiLabelBinarizer(classes=det_raw_ids)
track_det_matches = mlb.fit_transform(tracks_df['det_raw_id'])



In [None]:
scaler = StandardScaler()

X = tracks_df[['track_pt', 'track_eta', 'track_phi']].values
X_scaled = scaler.fit_transform(X)

Y = track_det_matches

print(X_scaled.shape)
print(Y.shape)

X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


model = XGBRegressor(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42)
model.fit(X_train, Y_train, eval_set=[(X_test, Y_test)], verbose=True)

In [None]:
Y_pred_reg = model.predict(X_test)
Y_pred = (Y_pred_reg > 0.1).astype(int)

accuracies = []
for i in range(Y_test.shape[1]):
    acc = accuracy_score(Y_test[:, i], Y_pred[:, i])
    accuracies.append(acc)

print(f"Mean accuracy across all labels: {np.mean(accuracies):.4f}")