In [1]:
import torch
import torch.optim as optim
import os
from resolve.utilities import utilities as utils
from resolve.helpers import DataLoaderManager
from resolve.helpers import Trainer, ModelsManager
from resolve.helpers import AsymmetricFocalWithFPPenalty, log_prob, recon_loss_mse, skip_loss, bce_with_logits, brier
from torch.utils.tensorboard import SummaryWriter
import yaml
import json


In [2]:
# Set the path to the yaml settings file here
path_to_settings = "./binary-black-hole/"
with open(f"{path_to_settings}/settings.yaml", "r") as f:
    config_file = yaml.safe_load(f)

torch.manual_seed(0)
version = config_file["path_settings"]["version"]
path_out = f'{config_file["path_settings"]["path_out_model"]}/model-{version}'

In [3]:
model_name = config_file["model_settings"]["network"]["model_used"]
network_config = config_file["model_settings"]["network"]["models"][model_name]
network_config["d_y"] = utils.get_feature_and_label_size(config_file)[1]
network_config["d_theta"]  = len(config_file["simulation_settings"]["theta_labels"])
network_config["d_phi"] = len(config_file["simulation_settings"]["phi_labels"])

manager = ModelsManager(network_config)
model = manager.get_network(config_file["model_settings"]["network"]["model_used"])

In [4]:
model

HCTargetAttnNP(
  (ctx_enc): ContextConditionalEncoder(
    (theta_enc): ThetaEncoder(
      (mlp): MLP(
        (net): Sequential(
          (0): Linear(in_features=4, out_features=64, bias=True)
        )
      )
    )
    (feature_layers): ModuleList(
      (0): Linear(in_features=11, out_features=128, bias=True)
      (1): Linear(in_features=128, out_features=128, bias=True)
    )
    (film_layers): ModuleList(
      (0-1): 2 x Linear(in_features=64, out_features=256, bias=True)
    )
    (final): Identity()
    (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  )
  (theta_enc_t): ThetaEncoder(
    (mlp): MLP(
      (net): Sequential(
        (0): Linear(in_features=4, out_features=128, bias=True)
        (1): ReLU()
        (2): Linear(in_features=128, out_features=128, bias=True)
        (3): ReLU()
        (4): Linear(in_features=128, out_features=128, bias=True)
      )
    )
  )
  (tquery): TargetQueryEncoder(
    (theta_enc): ThetaEncoder(
      (mlp): MLP(
     

In [5]:
# load data:
dataset_train = DataLoaderManager(mode = "train", 
                                config_file=config_file
                                )

dataset_train.set_dataset(shuffle=config_file["model_settings"]["train"]["dataset"]["shuffle_dataset"])

if config_file["model_settings"]["train"]["dataset"]["use_feature_normalization"] == "zscore":
    print("theta mean: ", dataset_train.dataset._normalizer._get_scaler("theta").mean_)
    print("phi mean: ", dataset_train.dataset._normalizer._get_scaler("phi").mean_)
elif config_file["model_settings"]["train"]["dataset"]["use_feature_normalization"] == "minmax":
    print("theta mean: ", dataset_train.dataset._normalizer._get_scaler("theta").data_range_)
    print("phi mean: ", dataset_train.dataset._normalizer._get_scaler("phi").data_range_)



theta mean:  [1.50423200e-02 4.99557795e+00 5.00162000e+02 4.98470000e+02]
phi mean:  [3.09912000e-02 1.38451402e+01 7.49881529e+01 3.83905393e+01
 1.93798800e-01 5.03367206e-01 1.08481965e+02 1.17347808e+01
 1.09972412e+01 9.90653600e-01]


In [6]:
os.system(f'mkdir -p {path_out}/model_{version}_tensorboard_logs')
os.system(f'rm {path_out}/model_{version}_tensorboard_logs/events*')
writer = SummaryWriter(log_dir=f'{path_out}/model_{version}_tensorboard_logs')

In [7]:
%%time
optimizer = None if model.__class__.__name__ == "IsolationForestWrapper" else optim.Adam(model.parameters(), lr=config_file["model_settings"]["train"]["learning_rate"])

# Instantiate the training wrapper for the first phase
trainer = Trainer(model, dataset_train)

trainer.epochs = config_file["model_settings"]["train"]["training_epochs"]

if isinstance(utils.get_nested(config_file, ["model_settings","train","dataset","positive_ratio_train"], False), list):
        trainer.criterion = AsymmetricFocalWithFPPenalty(
                        alpha_pos=1.,
                        alpha_neg=1.,
                        gamma_pos=0.,
                        gamma_neg=0.,
                        lambda_fp=0.,
                        tau_fp=0.5,
                        lambda_tp= 5.,
                        tau_tp=0.5,
                        reduction=utils.get_nested(config_file, ["model_settings","train","loss","reduction"], "mean"),
                        base_loss_fn=globals()[utils.get_nested(config_file, ["model_settings","train","loss","base_loss_fn"], "bce_with_logits")],
                )

        if utils.get_nested(config_file, ["model_settings","train","dataset","skip_warmup"], False) == True:
                print("loading warm up")
                model.load_state_dict(torch.load(f'{path_out}/model_{version}_warmup_model.pth'))
        else:
                # Train the model
                summary_train = trainer.warm_up(target_pos_frac = utils.get_nested(config_file, ["model_settings","train","dataset","positive_ratio_train"], None),
                        optimizer= optimizer,
                        writer=writer,
                        monitor = "pr_auc",
                        mode = "max",
                        save_best = True,
                        patience = 20,
                        num_data_pass_per_phase = utils.get_nested(config_file, ["model_settings","train","dataset","num_data_pass_per_phase"], 1.)
                )

                torch.save(model.state_dict(), f'{path_out}/model_{version}_warmup_model.pth')


CPU times: user 751 ms, sys: 402 ms, total: 1.15 s
Wall time: 272 ms


In [8]:
%%time

trainer.criterion = AsymmetricFocalWithFPPenalty(
                alpha_pos=utils.get_nested(config_file, ["model_settings","train","loss","alpha_pos"], 1.),
                alpha_neg=utils.get_nested(config_file, ["model_settings","train","loss","alpha_neg"], 1.),
                gamma_pos=utils.get_nested(config_file, ["model_settings","train","loss","gamma_pos"], 0.),
                gamma_neg=utils.get_nested(config_file, ["model_settings","train","loss","gamma_neg"], 0.),
                lambda_fp=utils.get_nested(config_file, ["model_settings","train","loss","lambda_fp"],0.),
                tau_fp=utils.get_nested(config_file, ["model_settings","train","loss","tau_fp"],0.5),
                lambda_tp=utils.get_nested(config_file, ["model_settings","train","loss","lambda_tp"],0.),
                tau_tp=utils.get_nested(config_file, ["model_settings","train","loss","tau_tp"],0.5),
                reduction=utils.get_nested(config_file, ["model_settings","train","loss","reduction"], "mean"),
                base_loss_fn=globals()[utils.get_nested(config_file, ["model_settings","train","loss","base_loss_fn"], "bce_with_logits")],
            )

# Train the model
summary_train = trainer.fit(optimizer=optimizer, patience = config_file["model_settings"]["train"]["patience"], writer=writer, ckpt_dir=f"{path_out}/checkpoints", ckpt_name=f"model_{version}_best.pt",
        monitor="pr_auc", mode="max")


train 1/1: 100%|██████████| 3000/3000 [00:58<00:00, 50.93it/s, loss=0.0008]
validate 1: 100%|██████████| 1000/1000 [00:12<00:00, 78.78it/s, loss=0.0004]


CPU times: user 55.6 s, sys: 10.9 s, total: 1min 6s
Wall time: 1min 16s


In [9]:
summary_train

{'best_score': 0.1474059969934624,
 'monitor': 'pr_auc',
 'mode': 'max',
 'epochs_ran': 1}

In [10]:
_ = trainer.evaluate(writer=writer, dataset_name="test")

test 0: 100%|██████████| 1000/1000 [00:12<00:00, 78.67it/s, loss=0.0004]


In [11]:
trainer.metrics["test"]


{'accuracy': 0.992684,
 'precision': 0.0,
 'recall': 0.0,
 'f1': 0.0,
 'roc_auc': 0.9583133457519003,
 'roc_curve': [array([0.00000000e+00, 0.00000000e+00, 1.00736992e-06, ...,
         9.99887175e-01, 9.99889189e-01, 1.00000000e+00]),
  array([0.00000000e+00, 1.36686714e-04, 1.36686714e-04, ...,
         1.00000000e+00, 1.00000000e+00, 1.00000000e+00])],
 'pr_auc': 0.14638637376226382,
 'precision_recall_curve': [array([0.007316  , 0.00731601, 0.00731601, ..., 0.5       , 1.        ,
         1.        ]),
  array([1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,
         1.36686714e-04, 1.36686714e-04, 0.00000000e+00]),
  0.007316],
 'loss': 0.0003755245533052314}

In [12]:
normalizer_train = dataset_train.dataset._normalizer

# load data:
dataset_test = DataLoaderManager(mode = "test", 
                                config_file=config_file
                                )
dataset_test.set_dataset(normalizer=normalizer_train, shuffle=config_file["model_settings"]["train"]["dataset"]["shuffle_dataset"])

tester = Trainer(model, dataset_test, epochs=1)
tester._report = 1
tester.criterion = trainer.criterion

# Train the model
summary_test = tester.evaluate(dataset_name="test", epoch=1, monitor="pr_auc",writer=writer)



test 1: 100%|██████████| 1000/1000 [00:13<00:00, 74.12it/s, loss=0.0004]


In [13]:
tester.metrics['test_2'] = tester.metrics.pop('test')
trainer.metrics |= tester.metrics

In [14]:
trainer.metrics['test_2']

{'accuracy': 0.992304,
 'precision': 0.0,
 'recall': 0.0,
 'f1': 0.0,
 'roc_auc': 0.9592290550299638,
 'roc_curve': [array([0.00000000e+00, 0.00000000e+00, 1.00775569e-06, ...,
         9.99933488e-01, 9.99935504e-01, 1.00000000e+00]),
  array([0.0000000e+00, 1.2993763e-04, 1.2993763e-04, ..., 1.0000000e+00,
         1.0000000e+00, 1.0000000e+00])],
 'pr_auc': 0.162258674858202,
 'precision_recall_curve': [array([0.007696  , 0.00769601, 0.00769602, ..., 0.5       , 1.        ,
         1.        ]),
  array([1.0000000e+00, 1.0000000e+00, 1.0000000e+00, ..., 1.2993763e-04,
         1.2993763e-04, 0.0000000e+00]),
  0.007696],
 'loss': 0.00041274439470354187}

In [15]:
torch.save(model.state_dict(), f'{path_out}/model_{version}_model.pth')
with open(f'{path_out}/model_{version}_settings.yaml', "w") as f:
    yaml.safe_dump(dataset_train.config_file, f)

In [16]:
safe_metrics = utils.make_json_safe(trainer.metrics)

with open(f'{path_out}/model_{version}_train_metrics.json', "w") as f:
    json.dump({model.__class__.__name__: safe_metrics}, f, indent=4)

In [17]:
dataset_train.dataset.close()
writer.close()
utils.cleanup_workspace({})

###  Start TensorBoard

Run this in terminal:

tensorboard --logdir=\<path to tensor board log dir\> --host=0.0.0.0 --port=7007

Open:

http://localhost:7007/

Check effiency performance & memory use

for memory:
    
    from memory_profiler import profile

    @profile
    def your_function():

> python -m memory_profiler your_script.py  > mem_profile.txt

> python -m cProfile -s tottime your_script.py
> sudo py-spy top -- python your_script.py
 
> sudo py-spy record -o loader.svg -- python your_script.py