In [1]:
import torch
import torch.optim as optim
import os
from resolve.utilities import utilities as utils
from resolve.helpers import DataGeneration
from resolve.helpers import Trainer, ModelsManager
from resolve.helpers import AsymmetricFocalWithFPPenalty, log_prob, bce_with_logits, recon_loss_mse, skip_loss
from torch.utils.tensorboard import SummaryWriter
import yaml
import json


In [2]:
# Set the path to the yaml settings file here
path_to_settings = "./binary-black-hole/"
with open(f"{path_to_settings}/settings.yaml", "r") as f:
    config_file = yaml.safe_load(f)

torch.manual_seed(0)
version = config_file["path_settings"]["version"]
path_out = f'{config_file["path_settings"]["path_out_model"]}/model-{version}'

In [3]:
model_name = config_file["model_settings"]["network"]["model_used"]
network_config = config_file["model_settings"]["network"]["models"][model_name]
network_config["d_y"] = utils.get_feature_and_label_size(config_file)[1]
network_config["d_theta"]  = len(config_file["simulation_settings"]["theta_labels"])
network_config["d_phi"] = len(config_file["simulation_settings"]["phi_labels"])

manager = ModelsManager(network_config)
model = manager.get_network(config_file["model_settings"]["network"]["model_used"])

encoder 15 32
encoder 32 64
encoder 64 48
encoder 48 32
decoder 46 48
decoder 48 64
decoder 64 48
decoder 48 32
decoder 32 2


In [4]:
# load data:
dataset_train = DataGeneration(mode = "train", 
                                config_file=config_file
                                )

dataset_train.set_dataset()

if config_file["model_settings"]["train"]["dataset"]["use_feature_normalization"] == "zscore":
    print("theta mean: ", dataset_train.dataset._normalizer._get_scaler("theta").mean_)
    print("phi mean: ", dataset_train.dataset._normalizer._get_scaler("phi").mean_)
elif config_file["model_settings"]["train"]["dataset"]["use_feature_normalization"] == "minmax":
    print("theta mean: ", dataset_train.dataset._normalizer._get_scaler("theta").data_range_)
    print("phi mean: ", dataset_train.dataset._normalizer._get_scaler("phi").data_range_)



positives ratio  tensor(0.0073)
applying mixup
theta mean:  [1.50423200e-02 4.99557795e+00 5.00162000e+02 4.98470000e+02]
phi mean:  [3.09912000e-02 1.38451402e+01 7.49881529e+01 3.83905393e+01
 1.93798800e-01 5.03367206e-01 1.08481965e+02 1.17347808e+01
 1.09972412e+01 9.90653600e-01]


In [5]:
os.system(f'mkdir -p {path_out}/model_{version}_tensorboard_logs')
os.system(f'rm {path_out}/model_{version}_tensorboard_logs/events*')
writer = SummaryWriter(log_dir=f'{path_out}/model_{version}_tensorboard_logs')

In [6]:
config_file["model_settings"]["train"]["dataset"]["positive_ratio_train"]

In [None]:
%%time
optimizer = None if model.__class__.__name__ == "IsolationForestWrapper" else optim.Adam(model.parameters(), lr=config_file["model_settings"]["train"]["learning_rate"])

# Instantiate the training wrapper for the first phase
trainer = Trainer(model, dataset_train)

trainer.epochs = config_file["model_settings"]["train"]["training_epochs"]

if utils.get_nested(config_file, ["model_settings","train","dataset","use_schedule"], False) == True:
        trainer.criterion = AsymmetricFocalWithFPPenalty(
                        alpha_pos=1.,
                        alpha_neg=1.,
                        gamma_pos=2.,
                        gamma_neg=2.,
                        lambda_fp=0.,
                        tau_fp=0.5,
                        lambda_tp=0.,
                        tau_tp=0.5,
                        reduction=utils.get_nested(config_file, ["model_settings","train","loss","reduction"], "mean"),
                        base_loss_fn=globals()[utils.get_nested(config_file, ["model_settings","train","loss","base_loss_fn"], "bce_with_logits")],
                )

        if utils.get_nested(config_file, ["model_settings","train","dataset","skip_warmup"], False) == True:
                print("loading warm up")
                model.load_state_dict(torch.load(f'{path_out}/model_{version}_warmup_model.pth'))
        else:
                # Train the model
                summary_train = trainer.warm_up(target_pos_frac = utils.get_nested(config_file, ["model_settings","train","dataset","positive_ratio_train"], None),
                        optimizer= optimizer,
                        writer=writer,
                        monitor = "pr_auc",
                        mode = "max",
                        save_best = True,
                        patience = 5,
                        max_epochs_per_phase = 2)

                torch.save(model.state_dict(), f'{path_out}/model_{version}_warmup_model.pth')


CPU times: user 643 ms, sys: 350 ms, total: 993 ms
Wall time: 345 ms


In [8]:
%%time

trainer.criterion = AsymmetricFocalWithFPPenalty(
                alpha_pos=utils.get_nested(config_file, ["model_settings","train","loss","alpha_pos"], 1.),
                alpha_neg=utils.get_nested(config_file, ["model_settings","train","loss","alpha_neg"], 1.),
                gamma_pos=utils.get_nested(config_file, ["model_settings","train","loss","gamma_pos"], 0.),
                gamma_neg=utils.get_nested(config_file, ["model_settings","train","loss","gamma_neg"], 0.),
                lambda_fp=utils.get_nested(config_file, ["model_settings","train","loss","lambda_fp"],0.),
                tau_fp=utils.get_nested(config_file, ["model_settings","train","loss","tau_fp"],0.5),
                lambda_tp=utils.get_nested(config_file, ["model_settings","train","loss","lambda_tp"],0.),
                tau_tp=utils.get_nested(config_file, ["model_settings","train","loss","tau_tp"],0.5),
                reduction=utils.get_nested(config_file, ["model_settings","train","loss","reduction"], "mean"),
                base_loss_fn=globals()[utils.get_nested(config_file, ["model_settings","train","loss","base_loss_fn"], "bce_with_logits")],
            )

# Train the model
summary_train = trainer.fit(optimizer=optimizer, writer=writer, ckpt_dir=f"{path_out}/checkpoints", ckpt_name=f"model_{version}_best.pt",
        monitor="pr_auc", mode="max")


train 1/1: 100%|██████████| 3000/3000 [00:35<00:00, 85.39it/s, loss=0.6129] 
validate 1: 100%|██████████| 1000/1000 [00:08<00:00, 114.57it/s, loss=0.6178]


CPU times: user 30 s, sys: 8.19 s, total: 38.2 s
Wall time: 48.2 s


In [9]:
summary_train

{'best_score': 0.16872863679610856,
 'monitor': 'pr_auc',
 'mode': 'max',
 'epochs_ran': 1}

In [10]:
trainer.metrics

{'train': {'accuracy': 0.8984036666666667,
  'precision': 0.8698227600386436,
  'recall': 0.9369077850686294,
  'f1': 0.902119818272847,
  'roc_auc': 0.9531509743808149,
  'roc_curve': [array([0.        , 0.        , 0.        , ..., 0.99741548, 0.99741682,
          1.        ]),
   array([0.00000000e+00, 6.67049553e-07, 3.86888741e-05, ...,
          1.00000000e+00, 1.00000000e+00, 1.00000000e+00])],
  'pr_auc': 0.9374615221414727,
  'precision_recall_curve': [array([0.499713  , 0.49971317, 0.49971333, ..., 1.        , 1.        ,
          1.        ]),
   array([1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
          1.33409911e-06, 6.67049553e-07, 0.00000000e+00]),
   0.499713],
  'loss': 0.6129497848947842},
 'validate': {'accuracy': 0.91071,
  'precision': 0.07238000020828778,
  'recall': 0.9694518063886176,
  'f1': 0.1347029750944859,
  'roc_auc': 0.9719936332930522,
  'roc_curve': [array([0.00000000e+00, 1.00722077e-06, 5.03610383e-06, ...,
          9.99696827e-01, 9.9

In [11]:
_ = trainer.evaluate(writer=writer, dataset_name="test")

test 0: 100%|██████████| 1000/1000 [00:08<00:00, 115.98it/s, loss=0.6179]


In [12]:
trainer.metrics["test"]

{'accuracy': 0.910488,
 'precision': 0.07380391587204416,
 'recall': 0.9663089254177422,
 'f1': 0.13713393356340012,
 'roc_auc': 0.9717284035104781,
 'roc_curve': [array([0.00000000e+00, 0.00000000e+00, 1.00741559e-06, ...,
         9.99215223e-01, 9.99217238e-01, 1.00000000e+00]),
  array([0.00000000e+00, 1.35851107e-04, 1.35851107e-04, ...,
         1.00000000e+00, 1.00000000e+00, 1.00000000e+00])],
 'pr_auc': 0.17286921614230094,
 'precision_recall_curve': [array([0.007361  , 0.00736101, 0.00736101, ..., 0.5       , 1.        ,
         1.        ]),
  array([1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
         1.35851107e-04, 1.35851107e-04, 0.00000000e+00]),
  0.007361],
 'loss': 0.6179399231672287}

In [13]:
torch.save(model.state_dict(), f'{path_out}/model_{version}_model.pth')
with open(f'{path_out}/model_{version}_settings.yaml', "w") as f:
    yaml.safe_dump(dataset_train.config_file, f)

In [14]:
safe_metrics = utils.make_json_safe(trainer.metrics)

with open(f'{path_out}/model_{version}_train_metrics.json', "w") as f:
    json.dump({model.__class__.__name__: safe_metrics}, f, indent=4)

In [15]:
dataset_train.dataset.close()
writer.close()
utils.cleanup_workspace({})

###  Start TensorBoard

Run this in terminal:

tensorboard --logdir=\<path to tensor board log dir\> --host=0.0.0.0 --port=7007

Open:

http://localhost:7007/

Check effiency performance & memory use

for memory:
    
    from memory_profiler import profile

    @profile
    def your_function():

> python -m memory_profiler your_script.py  > mem_profile.txt

> python -m cProfile -s tottime your_script.py
> sudo py-spy top -- python your_script.py
 
> sudo py-spy record -o loader.svg -- python your_script.py