In [1]:
import torch
import torch.optim as optim
import os
from resolve.utilities import utilities as utils
from resolve.helpers import DataGeneration
from resolve.helpers import Trainer, ModelsManager
from resolve.helpers import AsymmetricFocalWithFPPenalty, log_prob, bce_with_logits, recon_loss_mse, skip_loss
from torch.utils.tensorboard import SummaryWriter
import yaml
import json


In [2]:
# Set the path to the yaml settings file here
path_to_settings = "./binary-black-hole/"
with open(f"{path_to_settings}/settings.yaml", "r") as f:
    config_file = yaml.safe_load(f)

torch.manual_seed(0)
version = config_file["path_settings"]["version"]
path_out = f'{config_file["path_settings"]["path_out_model"]}/model-{version}'

In [3]:
model_name = config_file["model_settings"]["network"]["model_used"]
network_config = config_file["model_settings"]["network"]["models"][model_name]
network_config["d_y"] = utils.get_feature_and_label_size(config_file)[1]
network_config["d_theta"]  = len(config_file["simulation_settings"]["theta_labels"])
network_config["d_phi"] = len(config_file["simulation_settings"]["phi_labels"])

manager = ModelsManager(network_config)
model = manager.get_network(config_file["model_settings"]["network"]["model_used"])

In [4]:
# load data:
dataset_train = DataGeneration(mode = "train", 
                                config_file=config_file
                                )

dataset_train.set_dataset()

if config_file["model_settings"]["train"]["dataset"]["use_feature_normalization"] == "zscore":
    print("theta mean: ", dataset_train.dataset._normalizer._get_scaler("theta").mean_)
    print("phi mean: ", dataset_train.dataset._normalizer._get_scaler("phi").mean_)
elif config_file["model_settings"]["train"]["dataset"]["use_feature_normalization"] == "minmax":
    print("theta mean: ", dataset_train.dataset._normalizer._get_scaler("theta").data_range_)
    print("phi mean: ", dataset_train.dataset._normalizer._get_scaler("phi").data_range_)



positives ratio  tensor(0.0073)
66576 0
{'batch_size': 1000, 'num_batches': 2978, 'pos_frac': 0.0, 'num_epochs': 1}
theta mean:  [1.50423200e-02 4.99557795e+00 5.00162000e+02 4.98470000e+02]
phi mean:  [3.09912000e-02 1.38451402e+01 7.49881529e+01 3.83905393e+01
 1.93798800e-01 5.03367206e-01 1.08481965e+02 1.17347808e+01
 1.09972412e+01 9.90653600e-01]


In [5]:
os.system(f'mkdir -p {path_out}/model_{version}_tensorboard_logs')
os.system(f'rm {path_out}/model_{version}_tensorboard_logs/events*')
writer = SummaryWriter(log_dir=f'{path_out}/model_{version}_tensorboard_logs')

rm: ./binary-black-hole/out/model-v1.0.6/model_v1.0.6_tensorboard_logs/events*: No such file or directory


In [6]:
config_file["model_settings"]["train"]["dataset"]["positive_ratio_train"]

0

In [7]:
%%time
optimizer = None if model.__class__.__name__ == "IsolationForestWrapper" else optim.Adam(model.parameters(), lr=config_file["model_settings"]["train"]["learning_rate"])

# Instantiate the training wrapper for the first phase
trainer = Trainer(model, dataset_train)

trainer.epochs = config_file["model_settings"]["train"]["training_epochs"]

if utils.get_nested(config_file, ["model_settings","train","dataset","use_schedule"], False) == True:
        trainer.criterion = AsymmetricFocalWithFPPenalty(
                        alpha_pos=1.,
                        alpha_neg=1.,
                        gamma_pos=2.,
                        gamma_neg=2.,
                        lambda_fp=0.,
                        tau_fp=0.5,
                        lambda_tp=0.,
                        tau_tp=0.5,
                        reduction=utils.get_nested(config_file, ["model_settings","train","loss","reduction"], "mean"),
                        base_loss_fn=globals()[utils.get_nested(config_file, ["model_settings","train","loss","base_loss_fn"], "bce_with_logits")],
                )

        if utils.get_nested(config_file, ["model_settings","train","dataset","skip_warmup"], False) == True:
                print("loading warm up")
                model.load_state_dict(torch.load(f'{path_out}/model_{version}_warmup_model.pth'))
        else:
                # Train the model
                summary_train = trainer.warm_up(target_pos_frac = utils.get_nested(config_file, ["model_settings","train","dataset","positive_ratio_train"], None),
                        optimizer= optimizer,
                        writer=writer,
                        monitor = "pr_auc",
                        mode = "max",
                        save_best = True,
                        patience = 5,
                        max_epochs_per_phase = 2)

                torch.save(model.state_dict(), f'{path_out}/model_{version}_warmup_model.pth')


CPU times: user 1.18 s, sys: 332 ms, total: 1.51 s
Wall time: 226 ms


In [8]:
%%time

trainer.criterion = AsymmetricFocalWithFPPenalty(
                alpha_pos=utils.get_nested(config_file, ["model_settings","train","loss","alpha_pos"], 1.),
                alpha_neg=utils.get_nested(config_file, ["model_settings","train","loss","alpha_neg"], 1.),
                gamma_pos=utils.get_nested(config_file, ["model_settings","train","loss","gamma_pos"], 0.),
                gamma_neg=utils.get_nested(config_file, ["model_settings","train","loss","gamma_neg"], 0.),
                lambda_fp=utils.get_nested(config_file, ["model_settings","train","loss","lambda_fp"],0.),
                tau_fp=utils.get_nested(config_file, ["model_settings","train","loss","tau_fp"],0.5),
                lambda_tp=utils.get_nested(config_file, ["model_settings","train","loss","lambda_tp"],0.),
                tau_tp=utils.get_nested(config_file, ["model_settings","train","loss","tau_tp"],0.5),
                reduction=utils.get_nested(config_file, ["model_settings","train","loss","reduction"], "mean"),
                base_loss_fn=globals()[utils.get_nested(config_file, ["model_settings","train","loss","base_loss_fn"], "bce_with_logits")],
            )

# Train the model
summary_train = trainer.fit(optimizer=optimizer, patience = 5, writer=writer, ckpt_dir=f"{path_out}/checkpoints", ckpt_name=f"model_{version}_best.pt",
        monitor="pr_auc", mode="max")


train 1/200: 100%|██████████| 2978/2978 [00:32<00:00, 92.24it/s, loss=0.1314] 
validate 1: 100%|██████████| 1000/1000 [00:11<00:00, 90.56it/s, loss=0.0145]
train 2/200: 100%|██████████| 2978/2978 [00:32<00:00, 92.89it/s, loss=0.0124] 
validate 2: 100%|██████████| 1000/1000 [00:10<00:00, 92.30it/s, loss=0.0113]
train 3/200: 100%|██████████| 2978/2978 [00:32<00:00, 91.32it/s, loss=0.0107] 
validate 3: 100%|██████████| 1000/1000 [00:10<00:00, 96.26it/s, loss=0.0102]
train 4/200: 100%|██████████| 2978/2978 [00:32<00:00, 91.54it/s, loss=0.0096] 
validate 4: 100%|██████████| 1000/1000 [00:10<00:00, 91.35it/s, loss=0.0088]
train 5/200: 100%|██████████| 2978/2978 [00:31<00:00, 95.49it/s, loss=0.0086] 
validate 5: 100%|██████████| 1000/1000 [00:10<00:00, 95.38it/s, loss=0.0082]
train 6/200: 100%|██████████| 2978/2978 [00:31<00:00, 93.58it/s, loss=0.0062] 
validate 6: 100%|██████████| 1000/1000 [00:11<00:00, 90.27it/s, loss=0.0040]
train 7/200: 100%|██████████| 2978/2978 [00:31<00:00, 94.15it/s,

CPU times: user 4min 9s, sys: 1min 18s, total: 5min 28s
Wall time: 8min 11s


In [9]:
summary_train

{'best_score': 0.011914076884053717,
 'monitor': 'pr_auc',
 'mode': 'max',
 'epochs_ran': 11}

In [10]:
trainer.metrics

{'train': {'accuracy': 0.9999751495059453,
  'precision': 0.0,
  'recall': 0.0,
  'f1': 0.0,
  'roc_auc': nan,
  'pr_auc': nan,
  'loss': 0.0012195598782710837},
 'validate': {'accuracy': 0.992803,
  'precision': 0.0,
  'recall': 0.0,
  'f1': 0.0,
  'roc_auc': 0.2899512310968942,
  'roc_curve': [array([0.00000000e+00, 1.00722077e-06, 3.16468765e-03, ...,
          9.99996978e-01, 1.00000000e+00, 1.00000000e+00]),
   array([0.        , 0.        , 0.        , ..., 0.99986051, 0.99986051,
          1.        ])],
  'pr_auc': 0.004562684035529231,
  'precision_recall_curve': [array([0.007169  , 0.00716801, 0.00716801, ..., 0.        , 0.        ,
          1.        ]),
   array([1.        , 0.99986051, 0.99986051, ..., 0.        , 0.        ,
          0.        ]),
   0.007169],
  'loss': 0.0010856318596634083},
 'best_model': {'best_score': 0.011914076884053717,
  'monitor': 'pr_auc',
  'mode': 'max',
  'epochs_ran': 11}}

In [11]:
_ = trainer.evaluate(writer=writer, dataset_name="test")

test 0: 100%|██████████| 1000/1000 [00:10<00:00, 98.59it/s, loss=0.0011]


In [12]:
trainer.metrics["test"]

{'accuracy': 0.992602,
 'precision': 0.0,
 'recall': 0.0,
 'f1': 0.0,
 'roc_auc': 0.2901340761328927,
 'roc_curve': [array([0.00000000e+00, 1.00741559e-06, 9.64096716e-04, ...,
         9.99990933e-01, 1.00000000e+00, 1.00000000e+00]),
  array([0.       , 0.       , 0.       , ..., 0.9997283, 0.9997283,
         1.       ])],
 'pr_auc': 0.004682536098711478,
 'precision_recall_curve': [array([0.007361  , 0.00736001, 0.00735901, ..., 0.        , 0.        ,
         1.        ]),
  array([1.        , 0.99986415, 0.9997283 , ..., 0.        , 0.        ,
         0.        ]),
  0.007361],
 'loss': 0.0010863787995767779}

In [13]:
torch.save(model.state_dict(), f'{path_out}/model_{version}_model.pth')
with open(f'{path_out}/model_{version}_settings.yaml', "w") as f:
    yaml.safe_dump(dataset_train.config_file, f)

In [14]:
safe_metrics = utils.make_json_safe(trainer.metrics)

with open(f'{path_out}/model_{version}_train_metrics.json', "w") as f:
    json.dump({model.__class__.__name__: safe_metrics}, f, indent=4)

In [15]:
dataset_train.dataset.close()
writer.close()
utils.cleanup_workspace({})

###  Start TensorBoard

Run this in terminal:

tensorboard --logdir=\<path to tensor board log dir\> --host=0.0.0.0 --port=7007

Open:

http://localhost:7007/

Check effiency performance & memory use

for memory:
    
    from memory_profiler import profile

    @profile
    def your_function():

> python -m memory_profiler your_script.py  > mem_profile.txt

> python -m cProfile -s tottime your_script.py
> sudo py-spy top -- python your_script.py
 
> sudo py-spy record -o loader.svg -- python your_script.py