In [1]:
import os
import pickle
import pandas as pd
import torch
import numpy as np
import itertools
import json

In [2]:
PATH = "/home/andres/ray_results/hyperparams_optim/"
# ds_names = ["UCIHAR", "MHEALTH"]
# variants = ["corrcoef_all", "corrcoef_win"]

ds_names = ["REALDISP"]
variants = ["corrcoef_win"]
models = ["graphconv"]

In [3]:
results = []
configs = {}
hpo_dirs = [f"{r[0]}_{r[1]}_{r[2]}" for r in itertools.product(ds_names, variants, models)]
missing_checkpoints = 0
for hpodir in hpo_dirs:
    if os.path.exists(os.path.join(PATH, hpodir)):
        for trainer_dir in os.listdir(os.path.join(PATH, hpodir)):
            for trainer in os.listdir(os.path.join(PATH, hpodir, trainer_dir)):
                if "trainer" in trainer:
                    file_path = os.path.join(PATH, hpodir, trainer_dir, trainer, "checkpoints", "best_trial.pth")
                    try:
                        checkpoint = torch.load(file_path, map_location=torch.device("cpu"))
                        
                        dataset = hpodir.split("_")[0]
                        if dataset == "PAMAP2":
                            variant = '_'.join(hpodir.split("_")[1:4])
                            model = hpodir.split("_")[4]
                        else:
                            variant = '_'.join(hpodir.split("_")[1:3])
                            model = hpodir.split("_")[3]
                            
                        trial_id = "_".join(trainer.split('_')[0:3])
                            
                        results.append([dataset,  # dataset
                                        variant, # variant
                                        model, # model
                                        trial_id, # trial
                                        checkpoint["loss"], # loss
                                        checkpoint["acc"]]) # accuracy
                        conf = json.load(open(os.path.join(PATH, hpodir, trainer_dir, trainer, "params.json")))
                        configs["_".join(trainer.split('_')[0:3])] = conf
                    except:
                        missing_checkpoints += 1
                        print(f"Checkpoint for {'_'.join(trainer.split('_')[0:3])} corrupted or missing")
                        continue
                        
print(f"missing checkpoints: {missing_checkpoints}")

missing checkpoints: 0


In [4]:
results = pd.DataFrame(data=np.asarray(results), columns=["dataset", "variant", "model", "trial","loss","accuracy"])
results.head()

Unnamed: 0,dataset,variant,model,trial,loss,accuracy
0,REALDISP,corrcoef_all,graphconv,trainer_6dc111c8_284,0.1802812957697052,0.9474216380182002
1,REALDISP,corrcoef_all,graphconv,trainer_474d5fd2_439,0.2394479082707083,0.910010111223458
2,REALDISP,corrcoef_all,graphconv,trainer_e1807e33_137,0.455138195041362,0.8048533872598584
3,REALDISP,corrcoef_all,graphconv,trainer_953d0efd_467,0.2810693618637124,0.9150657229524772
4,REALDISP,corrcoef_all,graphconv,trainer_bfd956ac_375,0.6791088931610528,0.8230535894843276


In [5]:
results.loc[(results["dataset"] == "PAMAP2") & 
         (results["variant"] == "corrcoef_all_interpolate") & 
         (results["model"] == "graphconv")].sort_values(by=["loss"])

Unnamed: 0,dataset,variant,model,trial,loss,accuracy


In [10]:
def get_best_trial_conf(ds_name, variant, model, data):
    results = data
    best_trial = np.array(results.loc[(results["dataset"] == ds_name) & 
             (results["variant"] == variant) & 
             (results["model"] == model)])
    ret = ""
    if len(best_trial)>0:
        best_trial = best_trial[np.argsort(best_trial[:,4])][0, 3]
        variant_str = f"--ds_variant={'_'.join(variant.split('_')[0:-1])} --fillnan={variant.split('_')[-1]}" if ds_name == "PAMAP2" else f"--ds_variant={variant}"  
        ret = f"python3 activity_graph_classification.py --epochs=500 --ds_name={ds_name} {variant_str} --model_name={model} "
        add_batch_norm = False
        for k, v in configs[best_trial].items():
            if k not in ["epochs", "batch_norm", "epochs", "ds_name", "ds_variant", "model_name"]:
                ret += f"--{k}={v} "
            if k == "batch_norm":
                add_batch_norm = v
                
        ret += "--patience_tr=100 "
        if add_batch_norm:
            ret += "--batch_norm --log_wandb"
    
    return ret

In [11]:
params = [(r[0], r[1], r[2],) for r in itertools.product(ds_names, variants, models)]
for ds_name, variant, model in params:
    command = get_best_trial_conf(ds_name, variant, model, results)
    if command != "":
        print(command)

python3 activity_graph_classification.py --epochs=500 --ds_name=REALDISP --ds_variant=corrcoef_all --model_name=graphconv --aggr=max --batch_size=64 --classifier_dropout=0.3 --conv_dropout=0.1 --global_pooling=max --hidden_dim=128 --input_dim=128 --lr=0.00417 --num_layers=3 --out_dim=10 --w_decay=0.000355 --patience_tr=100 --batch_norm --log_wandb


In [None]:
summary = []
for name in ds_names:
    path = PATH + name + "/training/"
    for file_name in os.listdir(path):
        if "results" in file_name:
            results = pickle.load(open(path + file_name, 'rb'))
            exp = file_name.split(".")[0].split("_")
            
            graph_method = "_".join(exp[2:5])
            graph_method_desc = "_".join(exp[5:])
            
#             print(exp)
            
            summary.append([
                name,
                exp[1], 
                graph_method,
                graph_method_desc,
                results["balanced_accuracy"],
                results["accuracy"],
                results["f1_score"]
            ])
        

summary = pd.DataFrame(summary)
summary.columns = ["Dataset",
                   "Conv_type", "graph_method", "graph_method_desc",
                   "balanced_accuracy", "accuracy", "f1_score"]
        

In [None]:
sorted_results = summary.sort_values(by=["Dataset", "balanced_accuracy", "Conv_type", "graph_method"], ascending=False)

In [None]:
sorted_results[(sorted_results.Dataset == "PAMAP2") & (sorted_results.Conv_type == "GraphConv")] 

In [None]:
def plot_results(dataset, idx):
    data = summary[summary["Dataset"] == dataset].sort_values(by=["balanced_accuracy", "Conv_type"], ascending=False)
    data.loc[:, ["Conv_type", "balanced_accuracy", "f1_score"]].plot(x="Conv_type", figsize=(10,4))
    print(data.index)

In [None]:
plot_results("UCIHAR", [])

In [None]:
plot_results("MHEALTH", [19,40,33,35,31])

In [None]:
plot_results("PAMAP2", [49, 65, 51, 44, 67, 58])

In [None]:
import itertools
params = [[3], [32, 64, 128], [0.01, 0.05, 0.001, 0.005, 0.0001, 0.0005], [0.5], [False],
          ["add"], ["mean"], [0.5]]
search_grid = list(itertools.product(*params))

In [None]:
search_grid

In [None]:
n = "1"
print(n.zfill(4))