In [1]:
import pandas as pd

In [38]:
# Global Variables
CHANNEL = "1E2Mu"
MASS_POINTS = [
    "MHc70_MA15", "MHc70_MA40", "MHc70_MA65",
    "MHc100_MA15", "MHc100_MA25", "MHc100_MA60", "MHc100_MA95",
    "MHc130_MA15", "MHc130_MA45", "MHc130_MA55", "MHc130_MA90", "MHc130_MA125",
    "MHc160_MA15", "MHc160_MA45", "MHc160_MA75", "MHc160_MA85", "MHc160_MA120", "MHc160_MA155"
]

In [39]:
best_estimators = dict()
best_estimators["mass_point"] = MASS_POINTS
best_estimators["lr_fake"] = []
best_estimators["n_hidden_fake"] = []
best_estimators["lr_ttX"] = []
best_estimators["n_hidden_ttX"] = []

# fakes
for mass_point in MASS_POINTS:
    df = pd.read_csv(f"Outputs/{CHANNEL}/{mass_point}/metrics_vs_fake.csv")
    df = df[['index', 'auc_train', 'auc_valid', 'auc_test', 'ksprob_scipy', 'ksprob_root']]
    df.set_index("index", inplace=True)
    
    auc_max = 0.
    index = None
    for idx in df.index:
        ksprob = df.loc[idx, 'ksprob_root']
        auc_valid = df.loc[idx, 'auc_valid']
        auc_test = df.loc[idx, 'auc_test']
        
        # check whether the model meets the criteria
        if ksprob > 0.3 and abs(auc_valid - auc_test)/auc_test < 0.01:
            if auc_max < auc_valid:
                auc_max = auc_valid
                index = idx
            else:
                pass
            
    # get learning rates and n_hidden from index
    tokens = index.split("_")
    lr = tokens[0].split("-")[1]
    n_hidden = tokens[1].split("-")[1]
    best_estimators["lr_fake"].append(lr)
    best_estimators["n_hidden_fake"].append(n_hidden)

# ttX    
for mass_point in MASS_POINTS:
    df = pd.read_csv(f"Outputs/{CHANNEL}/{mass_point}/metrics_vs_ttX.csv")
    df = df[['index', 'auc_train', 'auc_valid', 'auc_test', 'ksprob_scipy', 'ksprob_root']]
    df.set_index("index", inplace=True)
    
    auc_max = 0.
    index = None
    for idx in df.index:
        ksprob = df.loc[idx, 'ksprob_root']
        auc_valid = df.loc[idx, 'auc_valid']
        auc_test = df.loc[idx, 'auc_test']
        
        # check whether the model meets the criteria
        if ksprob > 0.3 and abs(auc_valid - auc_test)/auc_test < 0.01:
            if auc_max < auc_valid:
                auc_max = auc_valid
                index = idx
            else:
                pass
            
    # get learning rates and n_hidden from index
    tokens = index.split("_")
    lr = tokens[0].split("-")[1]
    n_hidden = tokens[1].split("-")[1]
    best_estimators["lr_ttX"].append(lr)
    best_estimators["n_hidden_ttX"].append(n_hidden)

In [40]:
df = pd.DataFrame(best_estimators)
df.set_index("mass_point", inplace=True)
df.to_csv(f"Outputs/{CHANNEL}/CSV/hyper_params.csv")

In [20]:
for key, value in best_estimators.items():
    tokens = value[0].split("_")
    lr = tokens[0].split("-")[1]
    n_hidden = tokens[1].split("-")[1]
    print(f"{key}: lr-{lr}, n_hidden-{n_hidden}")

MHc70_MA15: lr-0.5, n_hidden-64
MHc70_MA40: lr-0.6, n_hidden-256
MHc70_MA65: lr-0.1, n_hidden-256
MHc100_MA15: lr-0.5, n_hidden-192
MHc100_MA25: lr-0.5, n_hidden-256
MHc100_MA60: lr-0.6, n_hidden-256
MHc100_MA95: lr-0.1, n_hidden-192
MHc130_MA15: lr-0.4, n_hidden-192
MHc130_MA45: lr-0.6, n_hidden-192
MHc130_MA55: lr-0.6, n_hidden-128
MHc130_MA90: lr-0.6, n_hidden-256
MHc130_MA125: lr-0.6, n_hidden-64
MHc160_MA15: lr-0.6, n_hidden-128
MHc160_MA45: lr-0.5, n_hidden-256
MHc160_MA75: lr-0.5, n_hidden-192
MHc160_MA85: lr-0.6, n_hidden-256
MHc160_MA120: lr-0.6, n_hidden-64
MHc160_MA155: lr-0.6, n_hidden-64
