# Topology Estimation Model

This notebook contains all the steps of topology model developement. 

In this notebook, the model can be:
- trained and tested
- loaded and run for new data

In [10]:
import sys
import os
import torch

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
print("Starting run from main script...")

Starting run from main script...


## 1. Load dataset

In [12]:
from data.config import DataConfig
from data.load import load_spring_particle_data

data_config = DataConfig()
data_config.set_train_dataset()

# get node and edge dataset path from which data will be loaded
node_ds_paths, edge_ds_paths = data_config.get_dataset_paths()

# load datalaoders
train_loader, valid_loader, test_loader = load_spring_particle_data(node_ds_paths, edge_ds_paths)

#### Set the number of timesteps and dimensions of the node data

In [13]:
dataiter = iter(train_loader)
data = next(dataiter)

n_nodes = data[0].shape[1]
n_timesteps = data[0].shape[2]
n_dims = data[0].shape[3]

print(f"Number of nodes: {n_nodes}")
print(f"Number of timesteps: {n_timesteps}")  
print(f"Number of dimensions: {n_dims}")

Number of nodes: 5
Number of timesteps: 49
Number of dimensions: 4


### Prepare the relation matrix for encoder input

##### Generate off-diagonal fully connected graph

In [14]:

from graph_structures import FullyConnectedGraph

rec_rel, send_rel = FullyConnectedGraph(n_nodes=n_nodes, batch_size=train_loader.batch_size).get_relation_matrices()

print(f"Receiver relation matrix shape: {rec_rel.shape}")

Receiver relation matrix shape: torch.Size([10, 20, 5])


## 2. Load and prepare the topology estimator model blocks

### Encoder

In [24]:
from topology_estimation.config import TopologyEstimatorConfig
from topology_estimation.encoder_blocks import Encoder
from torchinfo import summary

tp_config = TopologyEstimatorConfig()
tp_config.set_encoder_params()

encoder = Encoder(n_timesteps=n_timesteps, 
                  n_dims=n_dims,
                  pipeline=tp_config.encoder_pipeline, 
                  n_edge_types=tp_config.n_edge_types, 
                  is_residual_connection=tp_config.is_residual_connection,
                  edge_emd_configs=tp_config.edge_emb_configs_enc, 
                  node_emd_configs=tp_config.node_emb_configs_enc, 
                  drop_out_prob=tp_config.dropout_prob_enc,
                  batch_norm=tp_config.batch_norm_enc, 
                  attention_output_size=tp_config.attention_output_size)

# encoder.set_input_graph(rec_rel, send_rel)
enocder = encoder.to(device)

print(summary(encoder, (train_loader.batch_size, n_nodes, n_timesteps, n_dims)))
print(encoder)
a = encoder.pipeline
print("Encoder pipeline:")
for i in a:
    print(i)

RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: []

### Decoder

In [25]:
from topology_estimation.decoder_blocks import Decoder

tp_config.set_decoder_params()

decoder = Decoder(n_dim=n_dims,
                  msg_out_size=tp_config.msg_out_size,
                  n_edge_types=tp_config.n_edge_types,
                  skip_first=tp_config.skip_first_edge_type,
                  edge_mlp_config=tp_config.edge_mlp_config_dec,
                  recurrent_emd_type=tp_config.recurrent_emd_type,
                  out_mlp_config=tp_config.out_mlp_config_dec,
                  do_prob=tp_config.dropout_prob_dec,
                  is_batch_norm=tp_config.is_batch_norm_dec)


# generate random edge matrix
edge_matrix = torch.rand((train_loader.batch_size, 20, 2))
edge_matrix = edge_matrix.to(device)

decoder.set_input_graph(rec_rel, send_rel)
# decoder.set_edge_matrix(edge_matrix)
decoder.set_run_params()

decoder = decoder.to(device)

print(summary(decoder, (train_loader.batch_size, n_nodes, n_timesteps, n_dims)))
# print(decoder)

RuntimeError: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: []

### NRI model (Combine Encoder and Decoder blocks)

In [26]:
from topology_estimation.nri import NRI


nri_model = NRI(encoder, decoder)
nri_model.set_run_params()
nri_model.set_input_graph(rec_rel, send_rel)

print(summary(nri_model, (train_loader.batch_size, n_nodes, n_timesteps, n_dims)))
# print(nri_model)

Layer (type:depth-idx)                        Output Shape              Param #
NRI                                           [10, 20, 2]               --
├─Encoder: 1-1                                [10, 20, 2]               --
│    └─ModuleDict: 2-1                        --                        --
│    │    └─MLP: 3-1                          [10, 5, 8]                15,352
│    │    └─MLP: 3-2                          [10, 5, 8]                3,320
│    │    └─MLP: 3-3                          [10, 20, 8]               3,320
│    │    └─MLP: 3-4                          [10, 5, 8]                3,320
│    │    └─MLP: 3-5                          [10, 5, 8]                3,320
│    │    └─MLP: 3-6                          [10, 20, 8]               3,832
│    │    └─MLP: 3-7                          [10, 20, 8]               3,832
│    └─Linear: 2-2                            [10, 20, 2]               18
├─Decoder: 1-2                                [10, 5, 48, 4]            1

## 3. Training topology estimator model

In [None]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import RichProgressBar

tp_config.set_training_params()
nri_model.set_training_params()

logger = TensorBoardLogger('logs/trials', name='nri', version=version)

trainer = Trainer(
    max_epochs=tp_config.max_epochs,
    logger=logger,
    enable_progress_bar=True,
    log_every_n_steps=1,)

trainer.fit(model=nri_model, train_dataloaders=train_loader)

#### Plot and upload

In [None]:
from tensorboard.backend.event_processing import event_accumulator
import os

event_dir = os.path.join('model_logs', 'trials', 'nri_model_trial3', 'version_0')
print(event_dir)

ea = event_accumulator.EventAccumulator(event_dir)
ea.Reload()

# List all tags
# print(ea.Tags())

loss_events = ea.Scalars('train_loss')
losses = [event.value for event in loss_events]
steps = [event.step for event in loss_events]

print("Steps:", steps)
print("Losses:", losses)



In [None]:
import matplotlib
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import io
from pytorch_lightning.loggers import TensorBoardLogger
from PIL import Image
import numpy as np

fig, ax = plt.subplots()
ax.plot(steps, losses, label='Train Loss')
ax.set_xlabel('Steps')
ax.set_ylabel('Loss')
ax.legend()

# Convert matplotlib fig to image
buf = io.BytesIO()
fig.savefig(buf, format='png', dpi=1000)
buf.seek(0)
image = Image.open(buf)
image_np = np.array(image)

writer = SummaryWriter("model_logs\\trials\\nri_model_trial3")

writer.add_image("test_plot", image_np.transpose(2, 0, 1), global_step=0)
buf.close()
plt.close(fig)

### Trying the config file stuff

In [1]:
import sys
import os
import torch

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

In [2]:
from topology_estimation.config import TopologyEstimatorConfig, SelectTopologyEstimatorModel
from data.config import DataConfig
from data.load import load_spring_particle_data
tp_config = TopologyEstimatorConfig()
data_config = DataConfig()

if tp_config.is_nri:
    tp_config.set_encoder_params()
    tp_config.set_decoder_params()

if tp_config.is_sparsifier:
    tp_config.set_sparsifier_params()
    
# load data
data_config = DataConfig()
data_config.set_train_dataset()

# get node and edge dataset path from which data will be loaded
node_ds_paths, edge_ds_paths = data_config.get_dataset_paths()

# load datalaoders
train_loader, valid_loader, test_loader = load_spring_particle_data(node_ds_paths, edge_ds_paths)

dataiter = iter(train_loader)
data = next(dataiter)

n_nodes = data[0].shape[1]
n_timesteps = data[0].shape[2]
n_dims = data[0].shape[3]

In [None]:
import os

tp_config.get_log_path(data_config, n_timesteps)
print(tp_config.log_path)

os.makedirs(tp_config.log_path, exist_ok=True)

logs\spring_particles\P005\scenario_1\directed_graph\enc=mlp_1_dec=gru\dp=49\healthy\H1_[OG]\sparsif_knn\(sparsif)_no_fex\(nri)_no_fex\v1


In [6]:
tp_config.check_if_version_exists()

Version 1 already exists in the log path 'logs\spring_particles\P005\scenario_1\directed_graph\enc=mlp_1_dec=gru\dp=49\healthy\H1_[OG]\sparsif_knn\(sparsif)_no_fex\(nri)_no_fex\v1'.


In [8]:
print(tp_config.log_path)
os.makedirs(tp_config.log_path, exist_ok=True)

logs\spring_particles\P005\scenario_1\directed_graph\enc=mlp_1_dec=gru\dp=49\healthy\H1_[OG]\sparsif_knn\(sparsif)_no_fex\(nri)_no_fex\v2


In [9]:
from topology_estimation.config import SelectTopologyEstimatorModel

model_selector = SelectTopologyEstimatorModel(application='spring_particles',
                                              machine='P005',
                                              scenario='scenario_1',
                                              framework='directed_graph')

ckpt = model_selector.select_and_get_ckpt()
print(f"Selected checkpoint: {ckpt}")


Available version paths:
0: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp1_dec=gru\dp=50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\_no_sparsif\(nri)-fex_1\v1
1: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp1_dec=gru\dp=50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\_no_sparsif\(nri)_no_fex\v1
2: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp1_dec=gru\dp=50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\_no_sparsif\(nri)_no_fex\v28
3: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp1_dec=gru\dp=50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\sparsif_1\(sparsif)_no_fex\(nri)_no_fex\v1
4: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp1_dec=gru\dp=50\healthy\H1_[A1+A2+G]_+_H2_[G+A1]\sparsif_1\(sparsif)_no_fex\(nri)_no_fex\v28
5: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp_1_dec=gru\dp=49\healthy\H1_[OG]\_no_sparsif\(nri)_no_fex\v1
6: logs/spring_particles\P005\scenario_1\directed_graph\enc=mlp_1_dec=gru\dp=49\healthy\H1_[OG]\sparsif_knn\(sparsif)_n

ValueError: invalid literal for int() with base 10: ''