In [4]:
import os
import glob
import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier


def load_data(file_paths, track_scaler, detid_encoder):
    df_list = [pd.read_csv(file_path) for file_path in file_paths]
    df = pd.concat(df_list, ignore_index=True)
    df['det_raw_id'] = df['det_raw_id'].apply(lambda x: list(map(int, x.split(' ')[:-1])))

    X = track_scaler.transform(df[['track_pt', 'track_eta', 'track_phi']].values)
    Y = detid_encoder.transform(df['det_raw_id'])
    return X, Y


In [6]:
data_dir = '/users/hep/eigen1907/Workspace/Workspace-DL/241215-track_det_raw_id/TrackDetMatches'
detid_table_path = '/users/hep/eigen1907/Workspace/Workspace-DL/241215-track_det_raw_id/muon_system_det_raw_id.csv'
input_model_dir = '/users/hep/eigen1907/Workspace/Workspace-DL/241218-XGBoost/model/241218-init-file300-e100-d3'



model = XGBClassifier()
model.load_model(os.path.join(input_model_dir, 'model.json'))
track_scaler = joblib.load(os.path.join(input_model_dir, 'scaler.pkl'))
detid_encoder = joblib.load(os.path.join(input_model_dir, 'encoder.pkl'))
track_paths = sorted(glob.glob(os.path.join(data_dir, '*.csv')))
eval_paths = track_paths[-3:]

X_eval, Y_eval = load_data(eval_paths, track_scaler, detid_encoder)

preds = model.predict(X_eval)
accuracy = (preds == Y_eval).mean()

In [36]:
print(accuracy)

0.9998972728937358


In [42]:
print(np.where(Y_eval[2] == 1))
print(np.where(preds[2] == 1))

(array([ 6408,  6415,  7038,  7045,  7416,  7423,  7794,  7801,  9902,
        9998, 10534, 10630, 11586, 11590, 11600, 11682, 11686, 11696]),)
(array([ 6422,  7052,  7430,  7808,  9120, 10111, 10115, 10798, 10827]),)


In [51]:
print(Y_eval.shape)
print(preds.shape)

print((Y_eval != 0).sum(axis=1).max())
print((preds < Y_eval).sum(axis=1).mean())
print((preds > Y_eval)[0])
print(np.where(Y_eval[0] == 1))
print((Y_eval == 1).sum(axis=1))



(5866, 13168)
(5866, 13168)
25
0.605182407091715
[False False False ... False False False]

(array([ 5932,  6184,  6870,  7248,  7626, 11861, 11863, 11934, 11936]),)
[ 9  9 18 ... 10  8  9]



In [34]:
matched_chamber_count = (Y_eval == 1).sum(axis=1)
pred_matched_chamber_count = (preds == 1).sum(axis=1)

wrong_count = (preds != Y_eval).sum(axis=1)

print(matched_chamber_count)
print(pred_matched_chamber_count)
print(wrong_count)

[ 9  9 18 ... 10  8  9]
[9 9 9 ... 8 9 9]
[ 0  0 27 ...  2  1  0]
