In [11]:
import pandas as pd
import numpy as np

# Comparing two models

This notebook reads in the indices of wrongly classified events for each of the two models and compares how many of these are overlapping events.

In [12]:
MODELS = ["resnet18", "XGBoost"]
df_resnet = pd.read_csv("../results/resnet_1687178683_SPH_9_BH_n2_M10_BH_n4_M8_BH_n4_M10_BH_n4_M12_BH_n6_M10_CUT_wrongly_predicted.csv", names=["EventType", "EventID"], skiprows=1)
df_xgboost = pd.read_csv("../results/XGBoost_multiclass-classification_CUT_high-level_wrongly_predicted.csv", names=["EventType", "EventID"], skiprows=1)

In [13]:
print(len(df_resnet), len(df_xgboost))

42056 8626


In [14]:
print(df_resnet["EventID"].max())

81954


In [22]:
labels = list(df_resnet["EventType"].unique())
for label in labels:
    resnet = df_resnet[df_resnet["EventType"]==label]
    xgboost = df_xgboost[df_xgboost["EventType"]==label]
    intersect = np.intersect1d(resnet["EventID"], xgboost["EventID"])
    print(f"Number of {label} events wrongly classified by resnet: {len(resnet)}")
    print(f"Number of {label} events wrongly classified by xgboost: {len(xgboost)}")
    print(f"Number of {label} events wrongly classified by both: {len(intersect)}")
    print("-------------------------------")


Number of SPH_9 events wrongly classified by resnet: 1564
Number of SPH_9 events wrongly classified by xgboost: 392
Number of SPH_9 events wrongly classified by both: 6
-------------------------------
Number of BH_n2_M10 events wrongly classified by resnet: 9561
Number of BH_n2_M10 events wrongly classified by xgboost: 1985
Number of BH_n2_M10 events wrongly classified by both: 808
-------------------------------
Number of BH_n4_M8 events wrongly classified by resnet: 4213
Number of BH_n4_M8 events wrongly classified by xgboost: 923
Number of BH_n4_M8 events wrongly classified by both: 62
-------------------------------
Number of BH_n4_M10 events wrongly classified by resnet: 14109
Number of BH_n4_M10 events wrongly classified by xgboost: 2363
Number of BH_n4_M10 events wrongly classified by both: 1383
-------------------------------
Number of BH_n4_M12 events wrongly classified by resnet: 1857
Number of BH_n4_M12 events wrongly classified by xgboost: 880
Number of BH_n4_M12 events wro

In [16]:
df_resnet_sph = df_resnet[df_resnet["EventType"]=="SPH_9TeV"]
df_resnet_bh = df_resnet[df_resnet["EventType"]=="BH_n4_M10"]
df_xgboost_sph = df_xgboost[df_xgboost["EventType"]=="SPH_9TeV"]
df_xgboost_bh = df_xgboost[df_xgboost["EventType"]=="BH_n4_M10"]

In [17]:
df_resnet_sph = df_resnet_sph.sort_values(by="EventID")
df_resnet_bh = df_resnet_bh.sort_values(by="EventID")
df_xgboost_sph = df_xgboost_sph.sort_values(by="EventID")
df_xgboost_bh = df_xgboost_bh.sort_values(by="EventID")

In [18]:
intersect_sph = np.intersect1d(df_resnet_sph["EventID"], df_xgboost_sph["EventID"])
len_sph = len(df_resnet_sph["EventID"])
print(len_sph, len(intersect_sph))
intersect_prc = 100*(len(intersect_sph)/len_sph)
print(intersect_prc)

0 0


ZeroDivisionError: division by zero

In [None]:
intersect_bh = np.intersect1d(df_resnet_bh["EventID"], df_xgboost_bh["EventID"])
len_bh = len(df_resnet_bh["EventID"])
print(len_bh, len(intersect_bh))
intersect_prc = 100*(len(intersect_bh)/len_bh)
print(f"Percent of wrongly classified events that are the same: {intersect_prc}")

407 0
Percent of wrongly classified events that are the same: 0.0
